1/*-
2 * Copyright (c) 2001 Jake Burkholder.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <machine/asm.h>
28__FBSDID("$FreeBSD$");
29
30#include "opt_kstack_pages.h"
31
32#include <sys/errno.h>
33
34#include <machine/asi.h>
35#include <machine/asmacros.h>
36#include <machine/fsr.h>
37#include <machine/intr_machdep.h>
38#include <machine/pcb.h>
39#include <machine/pstate.h>
40#include <machine/wstate.h>
41
42#include "assym.s"
43
44	.register %g2, #ignore
45	.register %g3, #ignore
46	.register %g6, #ignore
47
48/*
49 * Common code for copy routines.
50 *
51 * We use large macros to generate functions for each of the copy routines.
52 * This allows the load and store instructions to be generated for the right
53 * operation, asi or not.  It is possible to write an asi independent function
54 * but this would require 2 expensive wrs in the main loop to switch %asi.
55 * It would also screw up profiling (if we ever get it), but may save some I$.
56 * We assume that either one of dasi and sasi is empty, or that they are both
57 * the same (empty or non-empty).  It is up to the caller to set %asi.
58 */
59
60/*
61 * ASI independent implementation of copystr(9).
62 * Used to implement copyinstr() and copystr().
63 *
64 * Return value is in %g1.
65 */
66#define	_COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
67	brz	len, 4f ; \
68	 mov	src, %g2 ; \
691:	deccc	1, len ; \
70	bl,a,pn	%xcc, 3f ; \
71	 nop ; \
72	LD(ub, sa) [src] sasi, %g1 ; \
73	ST(b, da) %g1, [dst] dasi ; \
74	brz,pn	%g1, 3f ; \
75	 inc	src ; \
76	ba	%xcc, 1b ; \
77	 inc	dst ; \
782:	mov	ENAMETOOLONG, %g1 ; \
793:	sub	src, %g2, %g2 ; \
80	brnz,a	done, 4f ; \
81	 stx	%g2, [done] ; \
824:
83
84/*
85 * ASI independent implementation of memset(3).
86 * Used to implement bzero(), memset() and aszero().
87 *
88 * If the pattern is non-zero, duplicate it to fill 64 bits.
89 * Store bytes until dst is 8-byte aligned, then store 8 bytes.
90 * It has yet to be determined how much unrolling is beneficial.
91 * Could also read and compare before writing to minimize snoop traffic.
92 *
93 * XXX bzero() should be implemented as
94 * #define bzero(dst, len) (void)memset((dst), 0, (len))
95 * if at all.
96 */
97#define	_MEMSET(dst, pat, len, da, dasi) \
98	brlez,pn len, 5f ; \
99	 and	pat, 0xff, pat ; \
100	brz,pt	pat, 1f ; \
101	 sllx	pat, 8, %g1 ; \
102	or	pat, %g1, pat ; \
103	sllx	pat, 16, %g1 ; \
104	or	pat, %g1, pat ; \
105	sllx	pat, 32, %g1 ; \
106	or	pat, %g1, pat ; \
107	.align 16 ; \
1081:	deccc	1, len ; \
109	bl,pn	%xcc, 5f ; \
110	 btst	7, dst ; \
111	bz,a,pt	%xcc, 2f ; \
112	 inc	1, len ; \
113	ST(b, da) pat, [dst] dasi ; \
114	ba	%xcc, 1b ; \
115	 inc	dst ; \
116	.align 16 ; \
1172:	deccc	32, len ; \
118	bl,a,pn	%xcc, 3f ; \
119	 inc	32, len ; \
120	ST(x, da) pat, [dst] dasi ; \
121	ST(x, da) pat, [dst + 8] dasi ; \
122	ST(x, da) pat, [dst + 16] dasi ; \
123	ST(x, da) pat, [dst + 24] dasi ; \
124	ba	%xcc, 2b ; \
125	 inc	32, dst ; \
126	.align 16 ; \
1273:	deccc	8, len ; \
128	bl,a,pn	%xcc, 4f ; \
129	 inc	8, len ; \
130	ST(x, da) pat, [dst] dasi ; \
131	ba	%xcc, 3b ; \
132	 inc	8, dst ; \
133	.align 16 ; \
1344:	deccc	1, len ; \
135	bl,a,pn	%xcc, 5f ; \
136	 nop ; \
137	ST(b, da) pat, [dst] dasi ; \
138	ba	%xcc, 4b ; \
139	 inc	1, dst ; \
1405:
141
142/*
143 * ASI independent implementation of memcpy(3).
144 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
145 * ascopyfrom() and ascopyto().
146 *
147 * Transfer bytes until dst is 8-byte aligned.  If src is then also 8 byte
148 * aligned, transfer 8 bytes, otherwise finish with bytes.  The unaligned
149 * case could be optimized, but it is expected that this is the uncommon
150 * case and of questionable value.  The code to do so is also rather large
151 * and ugly.  It has yet to be determined how much unrolling is beneficial.
152 *
153 * XXX bcopy() must also check for overlap.  This is stupid.
154 * XXX bcopy() should be implemented as
155 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
156 * if at all.
157 */
158#define	_MEMCPY(dst, src, len, da, dasi, sa, sasi) \
1591:	deccc	1, len ; \
160	bl,pn	%xcc, 6f ; \
161	 btst	7, dst ; \
162	bz,a,pt	%xcc, 2f ; \
163	 inc	1, len ; \
164	LD(ub, sa) [src] sasi, %g1 ; \
165	ST(b, da) %g1, [dst] dasi ; \
166	inc	1, src ; \
167	ba	%xcc, 1b ; \
168	 inc	1, dst ; \
169	.align 16 ; \
1702:	btst	7, src ; \
171	bz,a,pt	%xcc, 3f ; \
172	 nop ; \
173	ba,a	%xcc, 5f ; \
174	.align 16 ; \
1753:	deccc	32, len ; \
176	bl,a,pn	%xcc, 4f ; \
177	 inc	32, len ; \
178	LD(x, sa) [src] sasi, %g1 ; \
179	LD(x, sa) [src + 8] sasi, %g2 ; \
180	LD(x, sa) [src + 16] sasi, %g3 ; \
181	LD(x, sa) [src + 24] sasi, %g4 ; \
182	ST(x, da) %g1, [dst] dasi ; \
183	ST(x, da) %g2, [dst + 8] dasi ; \
184	ST(x, da) %g3, [dst + 16] dasi ; \
185	ST(x, da) %g4, [dst + 24] dasi ; \
186	inc	32, src ; \
187	ba	%xcc, 3b ; \
188	 inc	32, dst ; \
189	.align 16 ; \
1904:	deccc	8, len ; \
191	bl,a,pn	%xcc, 5f ; \
192	 inc	8, len ; \
193	LD(x, sa) [src] sasi, %g1 ; \
194	ST(x, da) %g1, [dst] dasi ; \
195	inc	8, src ; \
196	ba	%xcc, 4b ; \
197	 inc	8, dst ; \
198	.align 16 ; \
1995:	deccc	1, len ; \
200	bl,a,pn	%xcc, 6f ; \
201	 nop ; \
202	LD(ub, sa) [src] sasi, %g1 ; \
203	ST(b, da) %g1, [dst] dasi ; \
204	inc	src ; \
205	ba	%xcc, 5b ; \
206	 inc	dst ; \
2076:
208
209/*
210 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
211 */
212ENTRY(ascopy)
213	wr	%o0, 0, %asi
214	_MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
215	retl
216	 nop
217END(ascopy)
218
219/*
220 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
221 */
222ENTRY(ascopyfrom)
223	wr	%o0, 0, %asi
224	_MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi)
225	retl
226	 nop
227END(ascopyfrom)
228
229/*
230 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
231 */
232ENTRY(ascopyto)
233	wr	%o1, 0, %asi
234	_MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY)
235	retl
236	 nop
237END(ascopyto)
238
239/*
240 * void aszero(u_long asi, vm_offset_t pa, size_t len)
241 */
242ENTRY(aszero)
243	wr	%o0, 0, %asi
244	_MEMSET(%o1, %g0, %o2, a, %asi)
245	retl
246	 nop
247END(aszero)
248
249/*
250 * int bcmp(const void *b1, const void *b2, size_t len)
251 */
252ENTRY(bcmp)
253	brz,pn	%o2, 2f
254	 clr	%o3
2551:	ldub	[%o0 + %o3], %o4
256	ldub	[%o1 + %o3], %o5
257	cmp	%o4, %o5
258	bne,pn	%xcc, 2f
259	 inc	%o3
260	deccc	%o2
261	bne,pt	%xcc, 1b
262	 nop
2632:	retl
264	 mov	%o2, %o0
265END(bcmp)
266
267/*
268 * void bcopy(const void *src, void *dst, size_t len)
269 */
270ENTRY(bcopy)
271	/*
272	 * Check for overlap, and copy backwards if so.
273	 */
274	sub	%o1, %o0, %g1
275	cmp	%g1, %o2
276	bgeu,a,pt %xcc, 3f
277	 nop
278
279	/*
280	 * Copy backwards.
281	 */
282	add	%o0, %o2, %o0
283	add	%o1, %o2, %o1
2841:	deccc	1, %o2
285	bl,a,pn	%xcc, 2f
286	 nop
287	dec	1, %o0
288	ldub	[%o0], %g1
289	dec	1, %o1
290	ba	%xcc, 1b
291	 stb	%g1, [%o1]
2922:	retl
293	 nop
294
295	/*
296	 * Do the fast version.
297	 */
2983:	_MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
299	retl
300	 nop
301END(bcopy)
302
303/*
304 * void bzero(void *b, size_t len)
305 */
306ENTRY(bzero)
307	_MEMSET(%o0, %g0, %o1, EMPTY, EMPTY)
308	retl
309	 nop
310END(bzero)
311
312/*
313 * int copystr(const void *src, void *dst, size_t len, size_t *done)
314 */
315ENTRY(copystr)
316	_COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY)
317	retl
318	 mov	%g1, %o0
319END(copystr)
320
321/*
322 * void *memcpy(void *dst, const void *src, size_t len)
323 */
324ENTRY(memcpy)
325	mov	%o0, %o3
326	_MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
327	retl
328	 nop
329END(memcpy)
330
331/*
332 * void *memset(void *b, int c, size_t len)
333 */
334ENTRY(memset)
335	mov	%o0, %o3
336	_MEMSET(%o3, %o1, %o2, EMPTY, EMPTY)
337	retl
338	 nop
339END(memset)
340
341	.globl	copy_nofault_begin
342copy_nofault_begin:
343	nop
344
345/*
346 * int copyin(const void *uaddr, void *kaddr, size_t len)
347 */
348ENTRY(copyin)
349	wr	%g0, ASI_AIUP, %asi
350	_MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi)
351	retl
352	 clr	%o0
353END(copyin)
354
355/*
356 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
357 */
358ENTRY(copyinstr)
359	wr	%g0, ASI_AIUP, %asi
360	_COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY)
361	retl
362	 mov	%g1, %o0
363END(copyinstr)
364
365/*
366 * int copyout(const void *kaddr, void *uaddr, size_t len)
367 */
368ENTRY(copyout)
369	wr	%g0, ASI_AIUP, %asi
370	_MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY)
371	retl
372	 clr	%o0
373END(copyout)
374
375	.globl	copy_nofault_end
376copy_nofault_end:
377	nop
378
379ENTRY(copy_fault)
380	retl
381	 mov	EFAULT, %o0
382END(copy_fault)
383
384	.globl	fs_nofault_begin
385fs_nofault_begin:
386	nop
387
388/*
389 * Chatty aliases for fetch, store functions.
390 */
391	.globl	fubyte, fusword, fuword, subyte, susword, suword
392	.set	fubyte, fuword8
393	.set	fusword, fuword16
394	.set	fuword, fuword64
395	.set	subyte, suword8
396	.set	susword, suword16
397	.set	suword, suword64
398
399	.globl	casuword32, casuword, fuptr, suptr
400	.set	casuword, casuword64
401	.set	fuptr, fuword64
402	.set	suptr, suword64
403
404/*
405 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
406 */
407ENTRY(casuword32)
408	casa	[%o0] ASI_AIUP, %o1, %o2
409	retl
410	 mov	%o2, %o0
411END(casuword32)
412
413/*
414 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
415 */
416ENTRY(casuword64)
417	casxa	[%o0] ASI_AIUP, %o1, %o2
418	retl
419	 mov	%o2, %o0
420END(casuword64)
421
422/*
423 * int fuword8(const void *base)
424 */
425ENTRY(fuword8)
426	retl
427	 lduba	[%o0] ASI_AIUP, %o0
428END(fuword8)
429
430/*
431 * int fuword16(const void *base)
432 */
433ENTRY(fuword16)
434	retl
435	 lduha	[%o0] ASI_AIUP, %o0
436END(fuword16)
437
438/*
439 * int32_t fuword32(const void *base)
440 */
441ENTRY(fuword32)
442	retl
443	 lduwa	[%o0] ASI_AIUP, %o0
444END(fuword32)
445
446/*
447 * int64_t fuword64(const void *base)
448 */
449ENTRY(fuword64)
450	retl
451	 ldxa	[%o0] ASI_AIUP, %o0
452END(fuword64)
453
454/*
455 * int suword8(const void *base, int word)
456 */
457ENTRY(suword8)
458	stba	%o1, [%o0] ASI_AIUP
459	retl
460	 clr	%o0
461END(suword8)
462
463/*
464 * int suword16(const void *base, int word)
465 */
466ENTRY(suword16)
467	stha	%o1, [%o0] ASI_AIUP
468	retl
469	 clr	%o0
470END(suword16)
471
472/*
473 * int suword32(const void *base, int32_t word)
474 */
475ENTRY(suword32)
476	stwa	%o1, [%o0] ASI_AIUP
477	retl
478	 clr	%o0
479END(suword32)
480
481/*
482 * int suword64(const void *base, int64_t word)
483 */
484ENTRY(suword64)
485	stxa	%o1, [%o0] ASI_AIUP
486	retl
487	 clr	%o0
488END(suword64)
489
490	.globl	fs_nofault_intr_begin
491fs_nofault_intr_begin:
492	nop
493
494/*
495 * int fuswintr(const void *base)
496 */
497ENTRY(fuswintr)
498	retl
499	 lduha	[%o0] ASI_AIUP, %o0
500END(fuswintr)
501
502/*
503 * int suswintr(const void *base, int word)
504 */
505ENTRY(suswintr)
506	stha	%o1, [%o0] ASI_AIUP
507	retl
508	 clr	%o0
509END(suswintr)
510
511	.globl	fs_nofault_intr_end
512fs_nofault_intr_end:
513	nop
514
515	.globl	fs_nofault_end
516fs_nofault_end:
517	nop
518
519ENTRY(fs_fault)
520	retl
521	 mov	-1, %o0
522END(fs_fault)
523
524	.globl	fas_nofault_begin
525fas_nofault_begin:
526
527/*
528 * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
529 */
530ENTRY(fasword8)
531	wr	%o0, 0, %asi
532	membar	#Sync
533	lduba	[%o1] %asi, %o3
534	membar	#Sync
535	stb	%o3, [%o2]
536	retl
537	 clr	%o0
538END(fasword8)
539
540/*
541 * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
542 */
543ENTRY(fasword16)
544	wr	%o0, 0, %asi
545	membar	#Sync
546	lduha	[%o1] %asi, %o3
547	membar	#Sync
548	sth	%o3, [%o2]
549	retl
550	 clr	%o0
551END(fasword16)
552
553/*
554 * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
555 */
556ENTRY(fasword32)
557	wr	%o0, 0, %asi
558	membar	#Sync
559	lduwa	[%o1] %asi, %o3
560	membar	#Sync
561	stw	%o3, [%o2]
562	retl
563	 clr	%o0
564END(fasword32)
565
566	.globl	fas_nofault_end
567fas_nofault_end:
568	nop
569
570	.globl	fas_fault
571ENTRY(fas_fault)
572	retl
573	 mov	-1, %o0
574END(fas_fault)
575
576	.globl	fpu_fault_begin
577fpu_fault_begin:
578	nop
579
580/*
581 * void spitfire_block_copy(void *src, void *dst, size_t len)
582 */
583ENTRY(spitfire_block_copy)
584	rdpr	%pstate, %o3
585	wrpr	%g0, PSTATE_NORMAL, %pstate
586
587	wr	%g0, ASI_BLK_S, %asi
588	wr	%g0, FPRS_FEF, %fprs
589
590	sub	PCB_REG, TF_SIZEOF, %o4
591	ldx	[%o4 + TF_FPRS], %o5
592	andcc	%o5, FPRS_FEF, %g0
593	bz,a,pt	%xcc, 1f
594	 nop
595	stda	%f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
596	stda	%f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
597	stda	%f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
598	stda	%f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
599	membar	#Sync
600
601	andn	%o5, FPRS_FEF, %o5
602	stx	%o5, [%o4 + TF_FPRS]
603	ldx	[PCB_REG + PCB_FLAGS], %o4
604	or	%o4, PCB_FEF, %o4
605	stx	%o4, [PCB_REG + PCB_FLAGS]
606
6071:	wrpr	%o3, 0, %pstate
608
609	ldda	[%o0] %asi, %f0
610	add	%o0, VIS_BLOCKSIZE, %o0
611	sub	%o2, VIS_BLOCKSIZE, %o2
612
6132:	ldda	[%o0] %asi, %f16
614	fsrc1	%f0, %f32
615	fsrc1	%f2, %f34
616	fsrc1	%f4, %f36
617	fsrc1	%f6, %f38
618	fsrc1	%f8, %f40
619	fsrc1	%f10, %f42
620	fsrc1	%f12, %f44
621	fsrc1	%f14, %f46
622	stda	%f32, [%o1] %asi
623	add	%o0, VIS_BLOCKSIZE, %o0
624	subcc	%o2, VIS_BLOCKSIZE, %o2
625	bz,pn	%xcc, 3f
626	 add	%o1, VIS_BLOCKSIZE, %o1
627	ldda	[%o0] %asi, %f0
628	fsrc1	%f16, %f32
629	fsrc1	%f18, %f34
630	fsrc1	%f20, %f36
631	fsrc1	%f22, %f38
632	fsrc1	%f24, %f40
633	fsrc1	%f26, %f42
634	fsrc1	%f28, %f44
635	fsrc1	%f30, %f46
636	stda	%f32, [%o1] %asi
637	add	%o0, VIS_BLOCKSIZE, %o0
638	sub	%o2, VIS_BLOCKSIZE, %o2
639	ba,pt	%xcc, 2b
640	 add	%o1, VIS_BLOCKSIZE, %o1
641
6423:	membar	#Sync
643
644	stda	%f16, [%o1] %asi
645	membar	#Sync
646
647	retl
648	 wr	%g0, 0, %fprs
649END(spitfire_block_copy)
650
651/*
652 * void zeus_block_copy(void *src, void *dst, size_t len)
653 */
654ENTRY(zeus_block_copy)
655	prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0
656
657	rdpr	%pstate, %o3
658	wrpr	%g0, PSTATE_NORMAL, %pstate
659
660	wr	%g0, ASI_BLK_S, %asi
661	wr	%g0, FPRS_FEF, %fprs
662
663	sub	PCB_REG, TF_SIZEOF, %o4
664	ldx	[%o4 + TF_FPRS], %o5
665	andcc	%o5, FPRS_FEF, %g0
666	bz,a,pt	%xcc, 1f
667	 nop
668	stda	%f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
669	stda	%f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
670	stda	%f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
671	stda	%f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
672	membar	#Sync
673
674	andn	%o5, FPRS_FEF, %o5
675	stx	%o5, [%o4 + TF_FPRS]
676	ldx	[PCB_REG + PCB_FLAGS], %o4
677	or	%o4, PCB_FEF, %o4
678	stx	%o4, [PCB_REG + PCB_FLAGS]
679
6801:	wrpr	%o3, 0, %pstate
681
682	ldd	[%o0 + (0 * 8)], %f0
683	prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0
684	ldd	[%o0 + (1 * 8)], %f2
685	prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0
686	fmovd	%f0, %f32
687	ldd	[%o0 + (2 * 8)], %f4
688	prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
689	fmovd	%f2, %f34
690	ldd	[%o0 + (3 * 8)], %f6
691	prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1
692	fmovd	%f4, %f36
693	ldd	[%o0 + (4 * 8)], %f8
694	prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1
695	fmovd	%f6, %f38
696	ldd	[%o0 + (5 * 8)], %f10
697	prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
698	fmovd	%f8, %f40
699	ldd	[%o0 + (6 * 8)], %f12
700	prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1
701	fmovd	%f10, %f42
702	ldd	[%o0 + (7 * 8)], %f14
703	ldd	[%o0 + (8 * 8)], %f0
704	sub	%o2, VIS_BLOCKSIZE, %o2
705	add	%o0, VIS_BLOCKSIZE, %o0
706	prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1
707	ba,pt	%xcc, 2f
708	 prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1
709	.align	32
710
7112:	ldd	[%o0 + (1 * 8)], %f2
712	fmovd	%f12, %f44
713	ldd	[%o0 + (2 * 8)], %f4
714	fmovd	%f14, %f46
715	stda	%f32, [%o1] %asi
716	ldd	[%o0 + (3 * 8)], %f6
717	fmovd	%f0, %f32
718	ldd	[%o0 + (4 * 8)], %f8
719	fmovd	%f2, %f34
720	ldd	[%o0 + (5 * 8)], %f10
721	fmovd	%f4, %f36
722	ldd	[%o0 + (6 * 8)], %f12
723	fmovd	%f6, %f38
724	ldd	[%o0 + (7 * 8)], %f14
725	fmovd	%f8, %f40
726	ldd	[%o0 + (8 * 8)], %f0
727	fmovd	%f10, %f42
728	sub	%o2, VIS_BLOCKSIZE, %o2
729	prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
730	add	%o1, VIS_BLOCKSIZE, %o1
731	prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1
732	add	%o0, VIS_BLOCKSIZE, %o0
733	cmp	%o2, VIS_BLOCKSIZE + 8
734	bgu,pt	%xcc, 2b
735	 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
736	ldd	[%o0 + (1 * 8)], %f2
737	fsrc1	%f12, %f44
738	ldd	[%o0 + (2 * 8)], %f4
739	fsrc1	%f14, %f46
740	stda	%f32, [%o1] %asi
741	ldd	[%o0 + (3 * 8)], %f6
742	fsrc1	%f0, %f32
743	ldd	[%o0 + (4 * 8)], %f8
744	fsrc1	%f2, %f34
745	ldd	[%o0 + (5 * 8)], %f10
746	fsrc1	%f4, %f36
747	ldd	[%o0 + (6 * 8)], %f12
748	fsrc1	%f6, %f38
749	ldd	[%o0 + (7 * 8)], %f14
750	fsrc1	%f8, %f40
751	add	%o1, VIS_BLOCKSIZE, %o1
752	fsrc1	%f10, %f42
753	fsrc1	%f12, %f44
754	fsrc1	%f14, %f46
755	stda	%f32, [%o1] %asi
756	membar	#Sync
757
758	retl
759	 wr	%g0, 0, %fprs
760END(zeus_block_copy)
761
762/*
763 * void spitfire_block_zero(void *dst, size_t len)
764 * void zeus_block_zero(void *dst, size_t len)
765 */
766ALTENTRY(zeus_block_zero)
767ENTRY(spitfire_block_zero)
768	rdpr	%pstate, %o3
769	wrpr	%g0, PSTATE_NORMAL, %pstate
770
771	wr	%g0, ASI_BLK_S, %asi
772	wr	%g0, FPRS_FEF, %fprs
773
774	sub	PCB_REG, TF_SIZEOF, %o4
775	ldx	[%o4 + TF_FPRS], %o5
776	andcc	%o5, FPRS_FEF, %g0
777	bz,a,pt	%xcc, 1f
778	 nop
779	stda	%f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
780	stda	%f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
781	stda	%f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
782	stda	%f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
783	membar	#Sync
784
785	andn	%o5, FPRS_FEF, %o5
786	stx	%o5, [%o4 + TF_FPRS]
787	ldx	[PCB_REG + PCB_FLAGS], %o4
788	or	%o4, PCB_FEF, %o4
789	stx	%o4, [PCB_REG + PCB_FLAGS]
790
7911:	wrpr	%o3, 0, %pstate
792
793	fzero	%f0
794	fzero	%f2
795	fzero	%f4
796	fzero	%f6
797	fzero	%f8
798	fzero	%f10
799	fzero	%f12
800	fzero	%f14
801
8021:	stda	%f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi
803	stda	%f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi
804	stda	%f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi
805	stda	%f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi
806	sub	%o1, (4 * VIS_BLOCKSIZE), %o1
807	brnz,pt	%o1, 1b
808	 add	%o0, (4 * VIS_BLOCKSIZE), %o0
809	membar	#Sync
810
811	retl
812	 wr	%g0, 0, %fprs
813END(spitfire_block_zero)
814
815	.globl	fpu_fault_end
816fpu_fault_end:
817	nop
818
819	.globl	fpu_fault_size
820	.set	fpu_fault_size, fpu_fault_end - fpu_fault_begin
821
822ENTRY(longjmp)
823	set	1, %g3
824	movrz	%o1, %o1, %g3
825	mov	%o0, %g1
826	ldx	[%g1 + _JB_FP], %g2
8271:	cmp	%fp, %g2
828	bl,a,pt	%xcc, 1b
829	 restore
830	bne,pn	%xcc, 2f
831	 ldx	[%g1 + _JB_SP], %o2
832	cmp	%o2, %sp
833	blt,pn	%xcc, 2f
834	 movge	%xcc, %o2, %sp
835	ldx	[%g1 + _JB_PC], %o7
836	retl
837	 mov	%g3, %o0
8382:	PANIC("longjmp botch", %l1)
839END(longjmp)
840
841ENTRY(setjmp)
842	stx	%sp, [%o0 + _JB_SP]
843	stx	%o7, [%o0 + _JB_PC]
844	stx	%fp, [%o0 + _JB_FP]
845	retl
846	 clr	%o0
847END(setjmp)
848
849/*
850 * void ofw_entry(cell_t args[])
851 */
852ENTRY(ofw_entry)
853	save	%sp, -CCFSZ, %sp
854	SET(ofw_vec, %l7, %l6)
855	ldx	[%l6], %l6
856	rdpr	%pstate, %l7
857	andn	%l7, PSTATE_AM | PSTATE_IE, %l5
858	wrpr	%l5, 0, %pstate
859	SET(tba_taken_over, %l5, %l4)
860	brz,pn	%l4, 1f
861	 rdpr	%wstate, %l5
862	andn	%l5, WSTATE_PROM_MASK, %l3
863	wrpr	%l3, WSTATE_PROM_KMIX, %wstate
8641:	call	%l6
865	 mov	%i0, %o0
866	brz,pn	%l4, 1f
867	 nop
868	wrpr	%g0, %l5, %wstate
8691:	wrpr	%l7, 0, %pstate
870	ret
871	 restore %o0, %g0, %o0
872END(ofw_entry)
873
874/*
875 * void ofw_exit(cell_t args[])
876 */
877ENTRY(ofw_exit)
878	save	%sp, -CCFSZ, %sp
879	flushw
880	SET(ofw_tba, %l7, %l5)
881	ldx	[%l5], %l5
882	rdpr	%pstate, %l7
883	andn	%l7, PSTATE_AM | PSTATE_IE, %l7
884	wrpr	%l7, 0, %pstate
885	rdpr	%wstate, %l7
886	andn	%l7, WSTATE_PROM_MASK, %l7
887	wrpr	%l7, WSTATE_PROM_KMIX, %wstate
888	wrpr	%l5, 0, %tba			! restore the OFW trap table
889	SET(ofw_vec, %l7, %l6)
890	ldx	[%l6], %l6
891	SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
892	sub	%l0, SPOFF, %fp			! setup a stack in a locked page
893	sub	%l0, SPOFF + CCFSZ, %sp
894	mov	AA_DMMU_PCXR, %l3		! force primary DMMU context 0
895	sethi	%hi(KERNBASE), %l5
896	stxa	%g0, [%l3] ASI_DMMU
897	flush	%l5
898	wrpr	%g0, 0, %tl			! force trap level 0
899	call	%l6
900	 mov	%i0, %o0
901	! never to return
902END(ofw_exit)
903
904#ifdef GPROF
905
906ENTRY(user)
907	nop
908
909ENTRY(btrap)
910	nop
911
912ENTRY(etrap)
913	nop
914
915ENTRY(bintr)
916	nop
917
918ENTRY(eintr)
919	nop
920
921/*
922 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
923 * badness.
924 */
925#define	GM_STATE	0x0
926#define	GMON_PROF_OFF	3
927#define	GMON_PROF_HIRES	4
928
929	.globl	_mcount
930	.set	_mcount, __cyg_profile_func_enter
931
932ENTRY(__cyg_profile_func_enter)
933	SET(_gmonparam, %o3, %o2)
934	lduw	[%o2 + GM_STATE], %o3
935	cmp	%o3, GMON_PROF_OFF
936	be,a,pn %icc, 1f
937	 nop
938	SET(mcount, %o3, %o2)
939	jmpl	%o2, %g0
940	 nop
9411:	retl
942	 nop
943END(__cyg_profile_func_enter)
944
945#ifdef GUPROF
946
947ENTRY(__cyg_profile_func_exit)
948	SET(_gmonparam, %o3, %o2)
949	lduw	[%o2 + GM_STATE], %o3
950	cmp	%o3, GMON_PROF_HIRES
951	be,a,pn %icc, 1f
952	 nop
953	SET(mexitcount, %o3, %o2)
954	jmpl	%o2, %g0
955	 nop
9561:	retl
957	 nop
958END(__cyg_profile_func_exit)
959
960#endif /* GUPROF */
961
962#endif /* GPROF */
963