support.S revision 302408
1/*- 2 * Copyright (c) 2001 Jake Burkholder. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <machine/asm.h> 28__FBSDID("$FreeBSD: stable/11/sys/sparc64/sparc64/support.S 285627 2015-07-16 10:46:52Z zbb $"); 29 30#include "opt_kstack_pages.h" 31 32#include <sys/errno.h> 33 34#include <machine/asi.h> 35#include <machine/asmacros.h> 36#include <machine/fsr.h> 37#include <machine/intr_machdep.h> 38#include <machine/pcb.h> 39#include <machine/pstate.h> 40#include <machine/wstate.h> 41 42#include "assym.s" 43 44 .register %g2, #ignore 45 .register %g3, #ignore 46 .register %g6, #ignore 47 48/* 49 * Common code for copy routines. 50 * 51 * We use large macros to generate functions for each of the copy routines. 52 * This allows the load and store instructions to be generated for the right 53 * operation, asi or not. It is possible to write an asi independent function 54 * but this would require 2 expensive wrs in the main loop to switch %asi. 55 * It would also screw up profiling (if we ever get it), but may save some I$. 56 * We assume that either one of dasi and sasi is empty, or that they are both 57 * the same (empty or non-empty). It is up to the caller to set %asi. 58 */ 59 60/* 61 * ASI independent implementation of copystr(9). 62 * Used to implement copyinstr() and copystr(). 63 * 64 * Return value is in %g1. 65 */ 66#define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \ 67 brz len, 4f ; \ 68 mov src, %g2 ; \ 691: deccc 1, len ; \ 70 bl,a,pn %xcc, 3f ; \ 71 nop ; \ 72 LD(ub, sa) [src] sasi, %g1 ; \ 73 ST(b, da) %g1, [dst] dasi ; \ 74 brz,pn %g1, 3f ; \ 75 inc src ; \ 76 ba %xcc, 1b ; \ 77 inc dst ; \ 782: mov ENAMETOOLONG, %g1 ; \ 793: sub src, %g2, %g2 ; \ 80 brnz,a done, 4f ; \ 81 stx %g2, [done] ; \ 824: 83 84/* 85 * ASI independent implementation of memset(3). 86 * Used to implement bzero(), memset() and aszero(). 87 * 88 * If the pattern is non-zero, duplicate it to fill 64 bits. 89 * Store bytes until dst is 8-byte aligned, then store 8 bytes. 90 * It has yet to be determined how much unrolling is beneficial. 91 * Could also read and compare before writing to minimize snoop traffic. 92 * 93 * XXX bzero() should be implemented as 94 * #define bzero(dst, len) (void)memset((dst), 0, (len)) 95 * if at all. 96 */ 97#define _MEMSET(dst, pat, len, da, dasi) \ 98 brlez,pn len, 5f ; \ 99 and pat, 0xff, pat ; \ 100 brz,pt pat, 1f ; \ 101 sllx pat, 8, %g1 ; \ 102 or pat, %g1, pat ; \ 103 sllx pat, 16, %g1 ; \ 104 or pat, %g1, pat ; \ 105 sllx pat, 32, %g1 ; \ 106 or pat, %g1, pat ; \ 107 .align 16 ; \ 1081: deccc 1, len ; \ 109 bl,pn %xcc, 5f ; \ 110 btst 7, dst ; \ 111 bz,a,pt %xcc, 2f ; \ 112 inc 1, len ; \ 113 ST(b, da) pat, [dst] dasi ; \ 114 ba %xcc, 1b ; \ 115 inc dst ; \ 116 .align 16 ; \ 1172: deccc 32, len ; \ 118 bl,a,pn %xcc, 3f ; \ 119 inc 32, len ; \ 120 ST(x, da) pat, [dst] dasi ; \ 121 ST(x, da) pat, [dst + 8] dasi ; \ 122 ST(x, da) pat, [dst + 16] dasi ; \ 123 ST(x, da) pat, [dst + 24] dasi ; \ 124 ba %xcc, 2b ; \ 125 inc 32, dst ; \ 126 .align 16 ; \ 1273: deccc 8, len ; \ 128 bl,a,pn %xcc, 4f ; \ 129 inc 8, len ; \ 130 ST(x, da) pat, [dst] dasi ; \ 131 ba %xcc, 3b ; \ 132 inc 8, dst ; \ 133 .align 16 ; \ 1344: deccc 1, len ; \ 135 bl,a,pn %xcc, 5f ; \ 136 nop ; \ 137 ST(b, da) pat, [dst] dasi ; \ 138 ba %xcc, 4b ; \ 139 inc 1, dst ; \ 1405: 141 142/* 143 * ASI independent implementation of memcpy(3). 144 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(), 145 * ascopyfrom() and ascopyto(). 146 * 147 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte 148 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned 149 * case could be optimized, but it is expected that this is the uncommon 150 * case and of questionable value. The code to do so is also rather large 151 * and ugly. It has yet to be determined how much unrolling is beneficial. 152 * 153 * XXX bcopy() must also check for overlap. This is stupid. 154 * XXX bcopy() should be implemented as 155 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len)) 156 * if at all. 157 */ 158#define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \ 1591: deccc 1, len ; \ 160 bl,pn %xcc, 6f ; \ 161 btst 7, dst ; \ 162 bz,a,pt %xcc, 2f ; \ 163 inc 1, len ; \ 164 LD(ub, sa) [src] sasi, %g1 ; \ 165 ST(b, da) %g1, [dst] dasi ; \ 166 inc 1, src ; \ 167 ba %xcc, 1b ; \ 168 inc 1, dst ; \ 169 .align 16 ; \ 1702: btst 7, src ; \ 171 bz,a,pt %xcc, 3f ; \ 172 nop ; \ 173 ba,a %xcc, 5f ; \ 174 .align 16 ; \ 1753: deccc 32, len ; \ 176 bl,a,pn %xcc, 4f ; \ 177 inc 32, len ; \ 178 LD(x, sa) [src] sasi, %g1 ; \ 179 LD(x, sa) [src + 8] sasi, %g2 ; \ 180 LD(x, sa) [src + 16] sasi, %g3 ; \ 181 LD(x, sa) [src + 24] sasi, %g4 ; \ 182 ST(x, da) %g1, [dst] dasi ; \ 183 ST(x, da) %g2, [dst + 8] dasi ; \ 184 ST(x, da) %g3, [dst + 16] dasi ; \ 185 ST(x, da) %g4, [dst + 24] dasi ; \ 186 inc 32, src ; \ 187 ba %xcc, 3b ; \ 188 inc 32, dst ; \ 189 .align 16 ; \ 1904: deccc 8, len ; \ 191 bl,a,pn %xcc, 5f ; \ 192 inc 8, len ; \ 193 LD(x, sa) [src] sasi, %g1 ; \ 194 ST(x, da) %g1, [dst] dasi ; \ 195 inc 8, src ; \ 196 ba %xcc, 4b ; \ 197 inc 8, dst ; \ 198 .align 16 ; \ 1995: deccc 1, len ; \ 200 bl,a,pn %xcc, 6f ; \ 201 nop ; \ 202 LD(ub, sa) [src] sasi, %g1 ; \ 203 ST(b, da) %g1, [dst] dasi ; \ 204 inc src ; \ 205 ba %xcc, 5b ; \ 206 inc dst ; \ 2076: 208 209/* 210 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len) 211 */ 212ENTRY(ascopy) 213 wr %o0, 0, %asi 214 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi) 215 retl 216 nop 217END(ascopy) 218 219/* 220 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len) 221 */ 222ENTRY(ascopyfrom) 223 wr %o0, 0, %asi 224 _MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi) 225 retl 226 nop 227END(ascopyfrom) 228 229/* 230 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len) 231 */ 232ENTRY(ascopyto) 233 wr %o1, 0, %asi 234 _MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY) 235 retl 236 nop 237END(ascopyto) 238 239/* 240 * void aszero(u_long asi, vm_offset_t pa, size_t len) 241 */ 242ENTRY(aszero) 243 wr %o0, 0, %asi 244 _MEMSET(%o1, %g0, %o2, a, %asi) 245 retl 246 nop 247END(aszero) 248 249/* 250 * int bcmp(const void *b1, const void *b2, size_t len) 251 */ 252ENTRY(bcmp) 253 brz,pn %o2, 2f 254 clr %o3 2551: ldub [%o0 + %o3], %o4 256 ldub [%o1 + %o3], %o5 257 cmp %o4, %o5 258 bne,pn %xcc, 2f 259 inc %o3 260 deccc %o2 261 bne,pt %xcc, 1b 262 nop 2632: retl 264 mov %o2, %o0 265END(bcmp) 266 267/* 268 * void bcopy(const void *src, void *dst, size_t len) 269 */ 270ENTRY(bcopy) 271 /* 272 * Check for overlap, and copy backwards if so. 273 */ 274 sub %o1, %o0, %g1 275 cmp %g1, %o2 276 bgeu,a,pt %xcc, 3f 277 nop 278 279 /* 280 * Copy backwards. 281 */ 282 add %o0, %o2, %o0 283 add %o1, %o2, %o1 2841: deccc 1, %o2 285 bl,a,pn %xcc, 2f 286 nop 287 dec 1, %o0 288 ldub [%o0], %g1 289 dec 1, %o1 290 ba %xcc, 1b 291 stb %g1, [%o1] 2922: retl 293 nop 294 295 /* 296 * Do the fast version. 297 */ 2983: _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, EMPTY, EMPTY) 299 retl 300 nop 301END(bcopy) 302 303/* 304 * void bzero(void *b, size_t len) 305 */ 306ENTRY(bzero) 307 _MEMSET(%o0, %g0, %o1, EMPTY, EMPTY) 308 retl 309 nop 310END(bzero) 311 312/* 313 * int copystr(const void *src, void *dst, size_t len, size_t *done) 314 */ 315ENTRY(copystr) 316 _COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY) 317 retl 318 mov %g1, %o0 319END(copystr) 320 321/* 322 * void *memcpy(void *dst, const void *src, size_t len) 323 */ 324ENTRY(memcpy) 325 mov %o0, %o3 326 _MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY) 327 retl 328 nop 329END(memcpy) 330 331/* 332 * void *memset(void *b, int c, size_t len) 333 */ 334ENTRY(memset) 335 mov %o0, %o3 336 _MEMSET(%o3, %o1, %o2, EMPTY, EMPTY) 337 retl 338 nop 339END(memset) 340 341 .globl copy_nofault_begin 342copy_nofault_begin: 343 nop 344 345/* 346 * int copyin(const void *uaddr, void *kaddr, size_t len) 347 */ 348ENTRY(copyin) 349 wr %g0, ASI_AIUP, %asi 350 _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi) 351 retl 352 clr %o0 353END(copyin) 354 355/* 356 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) 357 */ 358ENTRY(copyinstr) 359 wr %g0, ASI_AIUP, %asi 360 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY) 361 retl 362 mov %g1, %o0 363END(copyinstr) 364 365/* 366 * int copyout(const void *kaddr, void *uaddr, size_t len) 367 */ 368ENTRY(copyout) 369 wr %g0, ASI_AIUP, %asi 370 _MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY) 371 retl 372 clr %o0 373END(copyout) 374 375 .globl copy_nofault_end 376copy_nofault_end: 377 nop 378 379ENTRY(copy_fault) 380 retl 381 mov EFAULT, %o0 382END(copy_fault) 383 384 .globl fs_nofault_begin 385fs_nofault_begin: 386 nop 387 388/* 389 * Chatty aliases for fetch, store functions. 390 */ 391 .globl fubyte, fusword, fuword, subyte, susword, suword 392 .set fubyte, fuword8 393 .set fusword, fuword16 394 .set fuword, fuword64 395 .set subyte, suword8 396 .set susword, suword16 397 .set suword, suword64 398 399 .globl casuword32, casuword, fuptr, suptr 400 .set casuword, casuword64 401 .set fuptr, fuword64 402 .set suptr, suword64 403 404/* 405 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s) 406 */ 407ENTRY(casuword32) 408 casa [%o0] ASI_AIUP, %o1, %o2 409 retl 410 mov %o2, %o0 411END(casuword32) 412 413/* 414 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s) 415 */ 416ENTRY(casuword64) 417 casxa [%o0] ASI_AIUP, %o1, %o2 418 retl 419 mov %o2, %o0 420END(casuword64) 421 422/* 423 * int fuword8(const void *base) 424 */ 425ENTRY(fuword8) 426 retl 427 lduba [%o0] ASI_AIUP, %o0 428END(fuword8) 429 430/* 431 * int fuword16(const void *base) 432 */ 433ENTRY(fuword16) 434 retl 435 lduha [%o0] ASI_AIUP, %o0 436END(fuword16) 437 438/* 439 * int32_t fuword32(const void *base) 440 */ 441ENTRY(fuword32) 442 retl 443 lduwa [%o0] ASI_AIUP, %o0 444END(fuword32) 445 446/* 447 * int64_t fuword64(const void *base) 448 */ 449ENTRY(fuword64) 450 retl 451 ldxa [%o0] ASI_AIUP, %o0 452END(fuword64) 453 454/* 455 * int suword8(const void *base, int word) 456 */ 457ENTRY(suword8) 458 stba %o1, [%o0] ASI_AIUP 459 retl 460 clr %o0 461END(suword8) 462 463/* 464 * int suword16(const void *base, int word) 465 */ 466ENTRY(suword16) 467 stha %o1, [%o0] ASI_AIUP 468 retl 469 clr %o0 470END(suword16) 471 472/* 473 * int suword32(const void *base, int32_t word) 474 */ 475ENTRY(suword32) 476 stwa %o1, [%o0] ASI_AIUP 477 retl 478 clr %o0 479END(suword32) 480 481/* 482 * int suword64(const void *base, int64_t word) 483 */ 484ENTRY(suword64) 485 stxa %o1, [%o0] ASI_AIUP 486 retl 487 clr %o0 488END(suword64) 489 490 .globl fs_nofault_intr_begin 491fs_nofault_intr_begin: 492 nop 493 494/* 495 * int fuswintr(const void *base) 496 */ 497ENTRY(fuswintr) 498 retl 499 lduha [%o0] ASI_AIUP, %o0 500END(fuswintr) 501 502/* 503 * int suswintr(const void *base, int word) 504 */ 505ENTRY(suswintr) 506 stha %o1, [%o0] ASI_AIUP 507 retl 508 clr %o0 509END(suswintr) 510 511 .globl fs_nofault_intr_end 512fs_nofault_intr_end: 513 nop 514 515 .globl fs_nofault_end 516fs_nofault_end: 517 nop 518 519ENTRY(fs_fault) 520 retl 521 mov -1, %o0 522END(fs_fault) 523 524 .globl fas_nofault_begin 525fas_nofault_begin: 526 527/* 528 * int fasword8(u_long asi, uint64_t addr, uint8_t *val) 529 */ 530ENTRY(fasword8) 531 wr %o0, 0, %asi 532 membar #Sync 533 lduba [%o1] %asi, %o3 534 membar #Sync 535 stb %o3, [%o2] 536 retl 537 clr %o0 538END(fasword8) 539 540/* 541 * int fasword16(u_long asi, uint64_t addr, uint16_t *val) 542 */ 543ENTRY(fasword16) 544 wr %o0, 0, %asi 545 membar #Sync 546 lduha [%o1] %asi, %o3 547 membar #Sync 548 sth %o3, [%o2] 549 retl 550 clr %o0 551END(fasword16) 552 553/* 554 * int fasword32(u_long asi, uint64_t addr, uint32_t *val) 555 */ 556ENTRY(fasword32) 557 wr %o0, 0, %asi 558 membar #Sync 559 lduwa [%o1] %asi, %o3 560 membar #Sync 561 stw %o3, [%o2] 562 retl 563 clr %o0 564END(fasword32) 565 566 .globl fas_nofault_end 567fas_nofault_end: 568 nop 569 570 .globl fas_fault 571ENTRY(fas_fault) 572 retl 573 mov -1, %o0 574END(fas_fault) 575 576 .globl fpu_fault_begin 577fpu_fault_begin: 578 nop 579 580/* 581 * void spitfire_block_copy(void *src, void *dst, size_t len) 582 */ 583ENTRY(spitfire_block_copy) 584 rdpr %pstate, %o3 585 wrpr %g0, PSTATE_NORMAL, %pstate 586 587 wr %g0, ASI_BLK_S, %asi 588 wr %g0, FPRS_FEF, %fprs 589 590 sub PCB_REG, TF_SIZEOF, %o4 591 ldx [%o4 + TF_FPRS], %o5 592 andcc %o5, FPRS_FEF, %g0 593 bz,a,pt %xcc, 1f 594 nop 595 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi 596 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi 597 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi 598 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi 599 membar #Sync 600 601 andn %o5, FPRS_FEF, %o5 602 stx %o5, [%o4 + TF_FPRS] 603 ldx [PCB_REG + PCB_FLAGS], %o4 604 or %o4, PCB_FEF, %o4 605 stx %o4, [PCB_REG + PCB_FLAGS] 606 6071: wrpr %o3, 0, %pstate 608 609 ldda [%o0] %asi, %f0 610 add %o0, VIS_BLOCKSIZE, %o0 611 sub %o2, VIS_BLOCKSIZE, %o2 612 6132: ldda [%o0] %asi, %f16 614 fsrc1 %f0, %f32 615 fsrc1 %f2, %f34 616 fsrc1 %f4, %f36 617 fsrc1 %f6, %f38 618 fsrc1 %f8, %f40 619 fsrc1 %f10, %f42 620 fsrc1 %f12, %f44 621 fsrc1 %f14, %f46 622 stda %f32, [%o1] %asi 623 add %o0, VIS_BLOCKSIZE, %o0 624 subcc %o2, VIS_BLOCKSIZE, %o2 625 bz,pn %xcc, 3f 626 add %o1, VIS_BLOCKSIZE, %o1 627 ldda [%o0] %asi, %f0 628 fsrc1 %f16, %f32 629 fsrc1 %f18, %f34 630 fsrc1 %f20, %f36 631 fsrc1 %f22, %f38 632 fsrc1 %f24, %f40 633 fsrc1 %f26, %f42 634 fsrc1 %f28, %f44 635 fsrc1 %f30, %f46 636 stda %f32, [%o1] %asi 637 add %o0, VIS_BLOCKSIZE, %o0 638 sub %o2, VIS_BLOCKSIZE, %o2 639 ba,pt %xcc, 2b 640 add %o1, VIS_BLOCKSIZE, %o1 641 6423: membar #Sync 643 644 stda %f16, [%o1] %asi 645 membar #Sync 646 647 retl 648 wr %g0, 0, %fprs 649END(spitfire_block_copy) 650 651/* 652 * void zeus_block_copy(void *src, void *dst, size_t len) 653 */ 654ENTRY(zeus_block_copy) 655 prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0 656 657 rdpr %pstate, %o3 658 wrpr %g0, PSTATE_NORMAL, %pstate 659 660 wr %g0, ASI_BLK_S, %asi 661 wr %g0, FPRS_FEF, %fprs 662 663 sub PCB_REG, TF_SIZEOF, %o4 664 ldx [%o4 + TF_FPRS], %o5 665 andcc %o5, FPRS_FEF, %g0 666 bz,a,pt %xcc, 1f 667 nop 668 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi 669 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi 670 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi 671 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi 672 membar #Sync 673 674 andn %o5, FPRS_FEF, %o5 675 stx %o5, [%o4 + TF_FPRS] 676 ldx [PCB_REG + PCB_FLAGS], %o4 677 or %o4, PCB_FEF, %o4 678 stx %o4, [PCB_REG + PCB_FLAGS] 679 6801: wrpr %o3, 0, %pstate 681 682 ldd [%o0 + (0 * 8)], %f0 683 prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0 684 ldd [%o0 + (1 * 8)], %f2 685 prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0 686 fmovd %f0, %f32 687 ldd [%o0 + (2 * 8)], %f4 688 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0 689 fmovd %f2, %f34 690 ldd [%o0 + (3 * 8)], %f6 691 prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1 692 fmovd %f4, %f36 693 ldd [%o0 + (4 * 8)], %f8 694 prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1 695 fmovd %f6, %f38 696 ldd [%o0 + (5 * 8)], %f10 697 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1 698 fmovd %f8, %f40 699 ldd [%o0 + (6 * 8)], %f12 700 prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1 701 fmovd %f10, %f42 702 ldd [%o0 + (7 * 8)], %f14 703 ldd [%o0 + (8 * 8)], %f0 704 sub %o2, VIS_BLOCKSIZE, %o2 705 add %o0, VIS_BLOCKSIZE, %o0 706 prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1 707 ba,pt %xcc, 2f 708 prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1 709 .align 32 710 7112: ldd [%o0 + (1 * 8)], %f2 712 fmovd %f12, %f44 713 ldd [%o0 + (2 * 8)], %f4 714 fmovd %f14, %f46 715 stda %f32, [%o1] %asi 716 ldd [%o0 + (3 * 8)], %f6 717 fmovd %f0, %f32 718 ldd [%o0 + (4 * 8)], %f8 719 fmovd %f2, %f34 720 ldd [%o0 + (5 * 8)], %f10 721 fmovd %f4, %f36 722 ldd [%o0 + (6 * 8)], %f12 723 fmovd %f6, %f38 724 ldd [%o0 + (7 * 8)], %f14 725 fmovd %f8, %f40 726 ldd [%o0 + (8 * 8)], %f0 727 fmovd %f10, %f42 728 sub %o2, VIS_BLOCKSIZE, %o2 729 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0 730 add %o1, VIS_BLOCKSIZE, %o1 731 prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1 732 add %o0, VIS_BLOCKSIZE, %o0 733 cmp %o2, VIS_BLOCKSIZE + 8 734 bgu,pt %xcc, 2b 735 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1 736 ldd [%o0 + (1 * 8)], %f2 737 fsrc1 %f12, %f44 738 ldd [%o0 + (2 * 8)], %f4 739 fsrc1 %f14, %f46 740 stda %f32, [%o1] %asi 741 ldd [%o0 + (3 * 8)], %f6 742 fsrc1 %f0, %f32 743 ldd [%o0 + (4 * 8)], %f8 744 fsrc1 %f2, %f34 745 ldd [%o0 + (5 * 8)], %f10 746 fsrc1 %f4, %f36 747 ldd [%o0 + (6 * 8)], %f12 748 fsrc1 %f6, %f38 749 ldd [%o0 + (7 * 8)], %f14 750 fsrc1 %f8, %f40 751 add %o1, VIS_BLOCKSIZE, %o1 752 fsrc1 %f10, %f42 753 fsrc1 %f12, %f44 754 fsrc1 %f14, %f46 755 stda %f32, [%o1] %asi 756 membar #Sync 757 758 retl 759 wr %g0, 0, %fprs 760END(zeus_block_copy) 761 762/* 763 * void spitfire_block_zero(void *dst, size_t len) 764 * void zeus_block_zero(void *dst, size_t len) 765 */ 766ALTENTRY(zeus_block_zero) 767ENTRY(spitfire_block_zero) 768 rdpr %pstate, %o3 769 wrpr %g0, PSTATE_NORMAL, %pstate 770 771 wr %g0, ASI_BLK_S, %asi 772 wr %g0, FPRS_FEF, %fprs 773 774 sub PCB_REG, TF_SIZEOF, %o4 775 ldx [%o4 + TF_FPRS], %o5 776 andcc %o5, FPRS_FEF, %g0 777 bz,a,pt %xcc, 1f 778 nop 779 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi 780 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi 781 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi 782 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi 783 membar #Sync 784 785 andn %o5, FPRS_FEF, %o5 786 stx %o5, [%o4 + TF_FPRS] 787 ldx [PCB_REG + PCB_FLAGS], %o4 788 or %o4, PCB_FEF, %o4 789 stx %o4, [PCB_REG + PCB_FLAGS] 790 7911: wrpr %o3, 0, %pstate 792 793 fzero %f0 794 fzero %f2 795 fzero %f4 796 fzero %f6 797 fzero %f8 798 fzero %f10 799 fzero %f12 800 fzero %f14 801 8021: stda %f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi 803 stda %f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi 804 stda %f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi 805 stda %f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi 806 sub %o1, (4 * VIS_BLOCKSIZE), %o1 807 brnz,pt %o1, 1b 808 add %o0, (4 * VIS_BLOCKSIZE), %o0 809 membar #Sync 810 811 retl 812 wr %g0, 0, %fprs 813END(spitfire_block_zero) 814 815 .globl fpu_fault_end 816fpu_fault_end: 817 nop 818 819 .globl fpu_fault_size 820 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin 821 822ENTRY(longjmp) 823 set 1, %g3 824 movrz %o1, %o1, %g3 825 mov %o0, %g1 826 ldx [%g1 + _JB_FP], %g2 8271: cmp %fp, %g2 828 bl,a,pt %xcc, 1b 829 restore 830 bne,pn %xcc, 2f 831 ldx [%g1 + _JB_SP], %o2 832 cmp %o2, %sp 833 blt,pn %xcc, 2f 834 movge %xcc, %o2, %sp 835 ldx [%g1 + _JB_PC], %o7 836 retl 837 mov %g3, %o0 8382: PANIC("longjmp botch", %l1) 839END(longjmp) 840 841ENTRY(setjmp) 842 stx %sp, [%o0 + _JB_SP] 843 stx %o7, [%o0 + _JB_PC] 844 stx %fp, [%o0 + _JB_FP] 845 retl 846 clr %o0 847END(setjmp) 848 849/* 850 * void ofw_entry(cell_t args[]) 851 */ 852ENTRY(ofw_entry) 853 save %sp, -CCFSZ, %sp 854 SET(ofw_vec, %l7, %l6) 855 ldx [%l6], %l6 856 rdpr %pstate, %l7 857 andn %l7, PSTATE_AM | PSTATE_IE, %l5 858 wrpr %l5, 0, %pstate 859 SET(tba_taken_over, %l5, %l4) 860 brz,pn %l4, 1f 861 rdpr %wstate, %l5 862 andn %l5, WSTATE_PROM_MASK, %l3 863 wrpr %l3, WSTATE_PROM_KMIX, %wstate 8641: call %l6 865 mov %i0, %o0 866 brz,pn %l4, 1f 867 nop 868 wrpr %g0, %l5, %wstate 8691: wrpr %l7, 0, %pstate 870 ret 871 restore %o0, %g0, %o0 872END(ofw_entry) 873 874/* 875 * void ofw_exit(cell_t args[]) 876 */ 877ENTRY(ofw_exit) 878 save %sp, -CCFSZ, %sp 879 flushw 880 SET(ofw_tba, %l7, %l5) 881 ldx [%l5], %l5 882 rdpr %pstate, %l7 883 andn %l7, PSTATE_AM | PSTATE_IE, %l7 884 wrpr %l7, 0, %pstate 885 rdpr %wstate, %l7 886 andn %l7, WSTATE_PROM_MASK, %l7 887 wrpr %l7, WSTATE_PROM_KMIX, %wstate 888 wrpr %l5, 0, %tba ! restore the OFW trap table 889 SET(ofw_vec, %l7, %l6) 890 ldx [%l6], %l6 891 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0) 892 sub %l0, SPOFF, %fp ! setup a stack in a locked page 893 sub %l0, SPOFF + CCFSZ, %sp 894 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0 895 sethi %hi(KERNBASE), %l5 896 stxa %g0, [%l3] ASI_DMMU 897 flush %l5 898 wrpr %g0, 0, %tl ! force trap level 0 899 call %l6 900 mov %i0, %o0 901 ! never to return 902END(ofw_exit) 903 904#ifdef GPROF 905 906ENTRY(user) 907 nop 908 909ENTRY(btrap) 910 nop 911 912ENTRY(etrap) 913 nop 914 915ENTRY(bintr) 916 nop 917 918ENTRY(eintr) 919 nop 920 921/* 922 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t 923 * badness. 924 */ 925#define GM_STATE 0x0 926#define GMON_PROF_OFF 3 927#define GMON_PROF_HIRES 4 928 929 .globl _mcount 930 .set _mcount, __cyg_profile_func_enter 931 932ENTRY(__cyg_profile_func_enter) 933 SET(_gmonparam, %o3, %o2) 934 lduw [%o2 + GM_STATE], %o3 935 cmp %o3, GMON_PROF_OFF 936 be,a,pn %icc, 1f 937 nop 938 SET(mcount, %o3, %o2) 939 jmpl %o2, %g0 940 nop 9411: retl 942 nop 943END(__cyg_profile_func_enter) 944 945#ifdef GUPROF 946 947ENTRY(__cyg_profile_func_exit) 948 SET(_gmonparam, %o3, %o2) 949 lduw [%o2 + GM_STATE], %o3 950 cmp %o3, GMON_PROF_HIRES 951 be,a,pn %icc, 1f 952 nop 953 SET(mexitcount, %o3, %o2) 954 jmpl %o2, %g0 955 nop 9561: retl 957 nop 958END(__cyg_profile_func_exit) 959 960#endif /* GUPROF */ 961 962#endif /* GPROF */ 963