1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#ifndef	_SYS_FPRAS_IMPL_H
28#define	_SYS_FPRAS_IMPL_H
29
30#pragma ident	"%Z%%M%	%I%	%E% SMI"
31
32#include <sys/fpras.h>
33
34#if !defined(_ASM)
35#include <sys/types.h>
36#else
37#include <sys/intreg.h>
38#include <sys/errno.h>
39#endif	/* _ASM */
40
41#ifdef	__cplusplus
42extern "C" {
43#endif
44
45/*
46 * sun4u/cheetah fpRAS implementation.  Arrays etc will be allocated in sun4u
47 * post_startup() if fpras_implemented is set.  This file may belong at
48 * the cpu level (eg, cheetahregs.h) but most of it should be common
49 * when fpRAS support is added for additional cpu types so we introduce
50 * it at the sun4u level (and set fpras_implemented in cpu_setup).
51 *
52 * If fpRAS is implemented on a sun4u/cpu combination that does not use
53 * an ASR for %stick then the FPRAS_INTERVAL macro will need some
54 * modification.
55 */
56
57/*
58 * Upper bound for check frequency per cpu and per operation.  For example, if
59 * this is 100 then for cpuid N performing a bcopy if that cpu has not
60 * performed a checked bcopy in the the last 1/100th of a second then
61 * we'll check the current operation.  A value of 0 will check every operation.
62 * Modifying fpras_frequency from its default is not recommended.
63 * fpras_interval is computed from fpras_frequency.
64 */
65#if !defined(_ASM)
66extern int fpras_frequency;
67extern int64_t fpras_interval;
68#endif	/* _ASM */
69#define	FPRAS_DEFAULT_FREQUENCY	100
70
71#if !defined(_ASM)
72
73/*
74 * Structure of a check function.  The preamble prepares registers for the
75 * upcoming calculation that is performed in blk0 and blk1.  One of those
76 * blocks will be rewritten as part of an FPRAS_REWRITE operation.  Finally
77 * the result checked in chkresult should be as predetermined, and we should
78 * return zero on success and nonzero on failure.  If an illegal instruction
79 * is encountered in the execution of the check function then we trampoline
80 * to the final three instructions to return a different value.
81 *
82 * Note that the size of this structure is a power of 2 as is the
83 * size of a struct fpras_chkfngrp.  The asm macros below rely on this
84 * in performing bit shifts instead of mulx.
85 */
86struct fpras_chkfn {
87	uint32_t	fpras_preamble[16];
88	uint32_t	fpras_blk0[16];
89	uint32_t	fpras_blk1[16];
90	uint32_t	fpras_chkresult[13];
91	uint32_t	fpras_trampoline[3];
92};
93
94/*
95 * Check function constructed to match a struct fpras_chkfn
96 */
97extern int fpras_chkfn_type1(void);
98
99/*
100 * A group of check functions, one for each operation type.  These will
101 * be the check functions for copy operations on a particular processor.
102 */
103struct fpras_chkfngrp {
104	struct fpras_chkfn fpras_fn[FPRAS_NCOPYOPS];
105};
106
107/*
108 * Where we store check functions for execution.  Indexed by cpuid and
109 * function within that for cacheline friendliness.  Startup code
110 * copies the check function into this array.  The fpRAS mechanism will
111 * rewrite one of fpras_blk0 or fpras_blk1 before calling the check function
112 * for a cpuid & copy function combination.
113 */
114extern struct fpras_chkfngrp *fpras_chkfngrps;
115
116#endif	/* !_ASM */
117
118#if defined(_ASM)
119
120/* BEGIN CSTYLED */
121
122/*
123 * The INTERVAL macro decides whether we will check this copy operation,
124 * based on performing no more than 1 check per cpu & operation in a specified
125 * time interval.  If it decides to abort this check (ie, we have checked
126 * recently) then it returns doex NULL, otherwise doex is the address of the
127 * check function to execute later.  Migration must have been prevented before
128 * calling this macro.  Args:
129 *
130 *	operation (immediate): one of FPRAS_BCOPY etc
131 *	blk (immediate): which block to copy
132 *	doex (register): register in which to return check function address
133 *	tmp1 (register): used for scratch, not preserved
134 *	tmp2 (register): used for scratch, not preserved
135 *	tmp3 (register): used for scratch, not preserved
136 *	tmp4 (register): used for scratch, not preserved
137 *	label: free local numeric label
138 */
139
140#define	FPRAS_INTERVAL(operation, blk, doex, tmp1, tmp2, tmp3, tmp4, label) \
141	sethi	%hi(fpras_interval), tmp1				;\
142	ldx	[tmp1 + %lo(fpras_interval)], tmp1			;\
143	brlz,pn	tmp1, label/**/f	/* not initialized? */		;\
144	  clr	doex							;\
145	sethi	%hi(fpras_disableids), tmp2				;\
146	ld	[tmp2 + %lo(fpras_disableids)], tmp2			;\
147	mov	0x1, tmp3						;\
148	sll	tmp3, operation, tmp3					;\
149	btst	tmp3, tmp2						;\
150	bnz,a,pn %icc, label/**/f	/* disabled for this op? */	;\
151	  nop								;\
152	set	fpras_chkfn_type1, tmp2					;\
153	prefetch [tmp2 + (FPRAS_BLK0 + blk * 64)], #one_read		;\
154	ldn	[THREAD_REG + T_CPU], tmp2				;\
155	ldn	[tmp2 + CPU_PRIVATE], tmp2				;\
156	brz,pn	tmp2, label/**/f	/* early in startup? */		;\
157	  mov	operation, tmp3						;\
158	sll	tmp3, 3, tmp3						;\
159	set	CHPR_FPRAS_TIMESTAMP, tmp4				;\
160	add	tmp2, tmp4, tmp2					;\
161	add	tmp2, tmp3, tmp2	/* keep ptr for update */	;\
162	ldx	[tmp2], tmp3		/* last timestamp */		;\
163	rd	STICK, doex		/* doex is a scratch here */	;\
164	sub	doex, tmp3, tmp4	/* delta since last check */	;\
165	cmp	tmp4, tmp1		/* compare delta to interval */	;\
166	blu,a,pn %xcc, label/**/f					;\
167	  clr	doex							;\
168	stx	doex, [tmp2]		/* updated timestamp */		;\
169	ldn	[THREAD_REG + T_CPU], tmp1				;\
170	ld	[tmp1 + CPU_ID], tmp1					;\
171	sethi	%hi(fpras_chkfngrps), doex				;\
172	ldn	[doex + %lo(fpras_chkfngrps)], doex			;\
173	sll	tmp1, FPRAS_CHKFNGRP_SIZE_SHIFT, tmp1			;\
174	add	doex, tmp1, doex					;\
175	mov	operation, tmp1						;\
176	sll	tmp1, FPRAS_CHKFN_SIZE_SHIFT, tmp1			;\
177	add	doex, tmp1, doex	/* address of check function */	;\
178label:
179
180/*
181 * The REWRITE macro copies an instruction block from fpras_chkfn_type1
182 * into a per-cpu fpras check function.
183 * If doex is NULL it must not attempt any copy, and must leave doex NULL.
184 * CPU migration of this thread must be prevented before we call this macro.
185 * We must have checked for fp in use (and saved state, including the
186 * quadrant of registers indicated by the fpq argument and fp enabled before
187 * using this macro.  Args:
188 *
189 *	blk (immediate): as above
190 *	doex (register): register in which to return check function addr
191 *	[fpq (fp register): frf quadrant to be used (%f0/%f16/%f32/%f48)]
192 *		This is used on type 1 rewrite only - on others the
193 *		quadrant is implicit/hardcoded in the macro name.
194 *	tmp1 (register): used for scratch, not preserved
195 *	label1: free local numeric label
196 *	[label2: free local numeric label]
197 *		This is used in type 2 only.
198 *
199 * Note that the REWRITE macros do not perform a flush instruction -
200 * flush is not necessary on Cheetah derivative processors in which
201 * i$ snoops for invalidations.
202 */
203
204/*
205 * Rewrite type 1 will work with any instruction pattern - it just block
206 * loads and block stores the given block.  A membar after block store
207 * forces the block store to complete before upcoming reuse of the
208 * fpregs in the block;  the block load is blocking on sun4u/cheetah
209 * so no need for a membar after it.
210 */
211
212#define	FPRAS_REWRITE_TYPE1(blk, doex, fpq, tmp1, label)	\
213	brz,pn  doex, label/**/f				;\
214	  sethi	%hi(fpras_chkfn_type1), tmp1			;\
215	add	tmp1, %lo(fpras_chkfn_type1), tmp1		;\
216	add	tmp1, FPRAS_BLK0 + blk * 64, tmp1		;\
217	ldda	[tmp1]ASI_BLK_P, fpq				;\
218	add	doex, FPRAS_BLK0 + blk * 64, tmp1		;\
219	stda	fpq, [tmp1]ASI_BLK_P				;\
220	membar	#Sync						;\
221label:
222
223/*
224 * Rewrite type 2 will only work with instruction blocks that satisfy
225 * this particular repeat pattern.  Note that the frf quadrant to
226 * use is implicit in the macro name and had better match what the
227 * copy function is preserving.
228*
229 * The odd looking repetition in the initial loop is designed to open
230 * up boths paths from prefetch cache to the frf - unrolling the loop
231 * would defeat this.  In addition we perform idempotent faligndata
232 * manipulations using %tick as a randomly aligned address (this only
233 * works for address that aren't doubleword aligned).
234 */
235#define	FPRAS_REWRITE_TYPE2Q1(blk, doex, tmp1, tmp2, label1, label2)	\
236	brz,pn	doex, label1/**/f					;\
237	  mov	0x2, tmp1						;\
238	set	fpras_chkfn_type1, tmp2					;\
239label2:									;\
240	deccc		tmp1						;\
241	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64)], %f4		;\
242	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64) + 8], %f2	;\
243	bnz,a,pt	%icc, label2/**/b				;\
244	  fsrc1		%f4, %f0					;\
245	rdpr		%tick, tmp1					;\
246	fsrc1		%f4, %f8					;\
247	fsrc1		%f2, %f10					;\
248	btst		0x7, tmp1					;\
249	alignaddr	tmp1, %g0, %g0	/* changes %gsr */		;\
250	bz,pn		%icc, label2/**/f				;\
251	  faligndata	%f2, %f4, %f6					;\
252	faligndata	%f0, %f2, %f12					;\
253	alignaddrl	tmp1, %g0, %g0					;\
254	faligndata	%f12, %f6, %f6					;\
255label2:									;\
256	add		doex, FPRAS_BLK0 + blk * 64, tmp1		;\
257	fsrc2		%f8, %f12					;\
258	fsrc1		%f6, %f14					;\
259	stda		%f0, [tmp1]ASI_BLK_P				;\
260	membar		#Sync						;\
261label1:
262
263#define	FPRAS_REWRITE_TYPE2Q2(blk, doex, tmp1, tmp2, label1, label2)	\
264	brz,pn	doex, label1/**/f					;\
265	  mov	0x2, tmp1						;\
266	set	fpras_chkfn_type1, tmp2					;\
267label2:									;\
268	deccc		tmp1						;\
269	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64)], %f20	;\
270	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64) + 8], %f18	;\
271	bnz,a,pt	%icc, label2/**/b				;\
272	  fsrc1		%f20, %f16					;\
273	rdpr		%tick, tmp1					;\
274	fsrc1		%f20, %f24					;\
275	fsrc1		%f18, %f26					;\
276	btst		0x7, tmp1					;\
277	alignaddr	tmp1, %g0, %g0	/* changes %gsr */		;\
278	bz,pn		%icc, label2/**/f				;\
279	  faligndata	%f18, %f20, %f22				;\
280	faligndata	%f16, %f18, %f28				;\
281	alignaddrl	tmp1, %g0, %g0					;\
282	faligndata	%f28, %f22, %f22				;\
283label2:									;\
284	add		doex, FPRAS_BLK0 + blk * 64, tmp1		;\
285	fsrc2		%f24, %f28					;\
286	fsrc1		%f22, %f30					;\
287	stda		%f16, [tmp1]ASI_BLK_P				;\
288	membar		#Sync						;\
289label1:
290
291/*
292 * The CHECK macro takes the 'doex' address of the check function to
293 * execute and jumps to it (if not NULL). If the check function returns
294 * nonzero then the check has failed and the CHECK macro must initiate
295 * an appropriate failure action.  Illegal instruction trap handlers
296 * will also recognise traps in this PC range as fp failures.  Thread
297 * migration must only be reallowed after completion of this check.  The
298 * CHECK macro should be treated as a CALL/JMPL - output registers are
299 * forfeit after using it.  If the call to fpras_failure returns
300 * (it may decide to panic) then invoke lofault handler (which must exist)
301 * to return an error (be sure to use this macro before restoring original
302 * lofault setup in copy functions).  Note that the lofault handler is the
303 * copyops aware proxy handler which will perform other tidy up operations
304 * (unbind, fp state restore) that would normally have been done in the tail
305 * of the copy function.
306 *
307 *	operation (immedidate): as above
308 *	doex (register): doex value returned from the REWRITE
309 *	label: free local numeric label
310 */
311
312#define	FPRAS_CHECK(operation, doex, label)				\
313	brz,pn	doex, label/**/f					;\
314	  nop								;\
315	jmpl	doex, %o7						;\
316	  nop								;\
317	cmp	%o0, FPRAS_OK						;\
318	be	%icc, label/**/f					;\
319	  nop								;\
320	mov	%o0, %o1	/* how detected */			;\
321	call	fpras_failure	/* take failure action */		;\
322	  mov	operation, %o0						;\
323	ldn	[THREAD_REG + T_LOFAULT], doex				;\
324	jmp	doex							;\
325	  mov	EFAULT, %g1						;\
326label:
327
328/* END CSTYLED */
329
330#endif	/* _ASM */
331
332#ifdef	__cplusplus
333}
334#endif
335
336#endif	/* _SYS_FPRAS_IMPL_H */
337