1/* $NetBSD: fp_complete.c,v 1.13 2011/06/07 00:48:30 matt Exp $ */
2
3/*-
4 * Copyright (c) 2001 Ross Harvey
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by the NetBSD
18 *	Foundation, Inc. and its contributors.
19 * 4. Neither the name of The NetBSD Foundation nor the names of its
20 *    contributors may be used to endorse or promote products derived
21 *    from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
37
38__KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.13 2011/06/07 00:48:30 matt Exp $");
39
40#include "opt_compat_osf1.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/proc.h>
45#include <sys/atomic.h>
46#include <sys/evcnt.h>
47
48#ifdef COMPAT_OSF1
49#include <compat/osf1/osf1_exec.h>
50#endif
51
52#include <machine/cpu.h>
53#include <machine/fpu.h>
54#include <machine/reg.h>
55#include <machine/alpha.h>
56#include <alpha/alpha/db_instruction.h>
57
58#include <lib/libkern/softfloat.h>
59
60#define	TSWINSIZE 4	/* size of trap shadow window in uint32_t units */
61
62/*	Set Name		Opcodes			AARM C.* Symbols  */
63
64#define	CPUREG_CLASS		(0xfUL << 0x10)		/* INT[ALSM]	  */
65#define	FPUREG_CLASS		(0xfUL << 0x14)		/* ITFP, FLT[ILV] */
66#define	CHECKFUNCTIONCODE	(1UL << 0x18)		/* MISC		  */
67#define	TRAPSHADOWBOUNDARY	(1UL << 0x00 |		/* PAL		  */\
68				 1UL << 0x19 |		/* \PAL\	  */\
69				 1UL << 0x1a |		/* JSR		  */\
70				 1UL << 0x1b |		/* \PAL\	  */\
71				 1UL << 0x1d |		/* \PAL\	  */\
72				 1UL << 0x1e |		/* \PAL\	  */\
73				 1UL << 0x1f |		/* \PAL\	  */\
74				 0xffffUL << 0x30 | 	/* branch ops	  */\
75				 CHECKFUNCTIONCODE)
76
77#define	MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \
78	(u_int ## width ## _t)(sign) << ((width) - 1)			|\
79	(u_int ## width ## _t)(exp)  << ((width) - 1 - (expwidth))	|\
80	(u_int ## width ## _t)(msb)  << ((width) - 1 - (expwidth) - 1)	|\
81	(u_int ## width ## _t)(rest_of_frac)
82
83#define	FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0)
84#define	FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0)
85
86#define IS_SUBNORMAL(v)	((v)->exp == 0 && (v)->frac != 0)
87
88#define	PREFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_DMZ	\
89				     && IS_SUBNORMAL(v))		\
90					 (v)->frac = 0; else
91
92#define	POSTFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_UMZ	\
93				      && IS_SUBNORMAL(v))		\
94					  (v)->frac = 0; else
95
96	/* Alpha returns 2.0 for true, all zeroes for false. */
97
98#define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L)
99
100	/* Move bits from sw fp_c to hw fpcr. */
101
102#define	CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m)))
103
104struct evcnt fpevent_use;
105struct evcnt fpevent_reuse;
106
107/*
108 * Temporary trap shadow instrumentation. The [un]resolved counters
109 * could be kept permanently, as they provide information on whether
110 * user code has met AARM trap shadow generation requirements.
111 */
112
113struct alpha_shadow {
114	uint64_t resolved;	/* cases trigger pc found */
115	uint64_t unresolved;	/* cases it wasn't, code problems? */
116	uint64_t scans;		/* trap shadow scans */
117	uint64_t len;		/* number of instructions examined */
118	uint64_t uop;		/* bit mask of unexpected opcodes */
119	uint64_t sqrts;	/* ev6+ square root single count */
120	uint64_t sqrtt;	/* ev6+ square root double count */
121	uint32_t ufunc;	/* bit mask of unexpected functions */
122	uint32_t max;		/* max trap shadow scan */
123	uint32_t nilswop;	/* unexpected op codes */
124	uint32_t nilswfunc;	/* unexpected function codes */
125	uint32_t nilanyop;	/* this "cannot happen" */
126	uint32_t vax;		/* sigs from vax fp opcodes */
127} alpha_shadow, alpha_shadow_zero;
128
129static float64 float64_unk(float64, float64);
130static float64 compare_un(float64, float64);
131static float64 compare_eq(float64, float64);
132static float64 compare_lt(float64, float64);
133static float64 compare_le(float64, float64);
134static void cvt_qs_ts_st_gf_qf(uint32_t, struct lwp *);
135static void cvt_gd(uint32_t, struct lwp *);
136static void cvt_qt_dg_qg(uint32_t, struct lwp *);
137static void cvt_tq_gq(uint32_t, struct lwp *);
138
139static float32 (*swfp_s[])(float32, float32) = {
140	float32_add, float32_sub, float32_mul, float32_div,
141};
142
143static float64 (*swfp_t[])(float64, float64) = {
144	float64_add, float64_sub, float64_mul, float64_div,
145	compare_un,    compare_eq,    compare_lt,    compare_le,
146	float64_unk, float64_unk, float64_unk, float64_unk
147};
148
149static void (*swfp_cvt[])(uint32_t, struct lwp *) = {
150	cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq
151};
152
153static void
154this_cannot_happen(int what_cannot_happen, int64_t bits)
155{
156	static int total;
157	alpha_instruction inst;
158	static uint64_t reported;
159
160	inst.bits = bits;
161	++alpha_shadow.nilswfunc;
162	if (bits != -1)
163		alpha_shadow.uop |= 1UL << inst.generic_format.opcode;
164	if (1UL << what_cannot_happen & reported)
165		return;
166	reported |= 1UL << what_cannot_happen;
167	if (total >= 1000)
168		return;	/* right now, this return "cannot happen" */
169	++total;
170	if (bits)
171		printf("FP instruction %x\n", (unsigned int)bits);
172	printf("FP event %d/%lx/%lx\n", what_cannot_happen, reported,
173	    alpha_shadow.uop);
174	printf("Please report this to port-alpha-maintainer@NetBSD.org\n");
175}
176
177static inline void
178sts(unsigned int rn, s_float *v, struct lwp *l)
179{
180	alpha_sts(rn, v);
181	PREFILTER_SUBNORMAL(l, v);
182}
183
184static inline void
185stt(unsigned int rn, t_float *v, struct lwp *l)
186{
187	alpha_stt(rn, v);
188	PREFILTER_SUBNORMAL(l, v);
189}
190
191static inline void
192lds(unsigned int rn, s_float *v, struct lwp *l)
193{
194	POSTFILTER_SUBNORMAL(l, v);
195	alpha_lds(rn, v);
196}
197
198static inline void
199ldt(unsigned int rn, t_float *v, struct lwp *l)
200{
201	POSTFILTER_SUBNORMAL(l, v);
202	alpha_ldt(rn, v);
203}
204
205static float64
206compare_lt(float64 a, float64 b)
207{
208	return CMP_RESULT(float64_lt(a, b));
209}
210
211static float64
212compare_le(float64 a, float64 b)
213{
214	return CMP_RESULT(float64_le(a, b));
215}
216
217static float64
218compare_un(float64 a, float64 b)
219{
220	if (float64_is_nan(a) | float64_is_nan(b)) {
221		if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b))
222			float_set_invalid();
223		return CMP_RESULT(1);
224	}
225	return CMP_RESULT(0);
226}
227
228static float64
229compare_eq(float64 a, float64 b)
230{
231	return CMP_RESULT(float64_eq(a, b));
232}
233/*
234 * A note regarding the VAX FP ops.
235 *
236 * The AARM gives us complete leeway to set or not set status flags on VAX
237 * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set
238 * flags by IEEE rules.  Many ops are common to d/f/g and s/t source types.
239 * For the purely vax ones, it's hard to imagine ever running them.
240 * (Generated VAX fp ops with completion flags? Hmm.)  We are careful never
241 * to panic, assert, or print unlimited output based on a path through the
242 * decoder, so weird cases don't become security issues.
243 */
244static void
245cvt_qs_ts_st_gf_qf(uint32_t inst_bits, struct lwp *l)
246{
247	t_float tfb, tfc;
248	s_float sfb, sfc;
249	alpha_instruction inst;
250
251	inst.bits = inst_bits;
252	/*
253	 * cvtst and cvtts have the same opcode, function, and source.  The
254	 * distinction for cvtst is hidden in the illegal modifier combinations.
255	 * We decode even the non-/s modifier, so that the fix-up-always mode
256	 * works on ev6 and later. The rounding bits are unused and fixed for
257	 * cvtst, so we check those too.
258	 */
259	switch(inst.float_format.function) {
260	case op_cvtst:
261	case op_cvtst_u:
262		sts(inst.float_detail.fb, &sfb, l);
263		tfc.i = float32_to_float64(sfb.i);
264		ldt(inst.float_detail.fc, &tfc, l);
265		return;
266	}
267	if(inst.float_detail.src == 2) {
268		stt(inst.float_detail.fb, &tfb, l);
269		sfc.i = float64_to_float32(tfb.i);
270		lds(inst.float_detail.fc, &sfc, l);
271		return;
272	}
273	/* 0: S/F */
274	/* 1:  /D */
275	/* 3: Q/Q */
276	this_cannot_happen(5, inst.generic_format.opcode);
277	tfc.i = FLOAT64QNAN;
278	ldt(inst.float_detail.fc, &tfc, l);
279	return;
280}
281
282static void
283cvt_gd(uint32_t inst_bits, struct lwp *l)
284{
285	t_float tfb, tfc;
286	alpha_instruction inst;
287
288	inst.bits = inst_bits;
289	stt(inst.float_detail.fb, &tfb, l);
290	(void) float64_to_float32(tfb.i);
291	l->l_md.md_flags &= ~NETBSD_FLAG_TO_FP_C(FP_X_IMP);
292	tfc.i = float64_add(tfb.i, (float64)0);
293	ldt(inst.float_detail.fc, &tfc, l);
294}
295
296static void
297cvt_qt_dg_qg(uint32_t inst_bits, struct lwp *l)
298{
299	t_float tfb, tfc;
300	alpha_instruction inst;
301
302	inst.bits = inst_bits;
303	switch(inst.float_detail.src) {
304	case 0:	/* S/F */
305		this_cannot_happen(3, inst.bits);
306		/* fall thru */
307	case 1: /* D */
308		/* VAX dirty 0's and reserved ops => UNPREDICTABLE */
309		/* We've done what's important by just not trapping */
310		tfc.i = 0;
311		break;
312	case 2: /* T/G */
313		this_cannot_happen(4, inst.bits);
314		tfc.i = 0;
315		break;
316	case 3:	/* Q/Q */
317		stt(inst.float_detail.fb, &tfb, l);
318		tfc.i = int64_to_float64(tfb.i);
319		break;
320	}
321	alpha_ldt(inst.float_detail.fc, &tfc);
322}
323/*
324 * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's
325 *      unfortunate habit of always returning the nontrapping result.
326 * XXX: there are several apparent AARM/AAH disagreements, as well as
327 *      the issue of trap handler pc and trapping results.
328 */
329static void
330cvt_tq_gq(uint32_t inst_bits, struct lwp *l)
331{
332	t_float tfb, tfc;
333	alpha_instruction inst;
334
335	inst.bits = inst_bits;
336	stt(inst.float_detail.fb, &tfb, l);
337	tfc.i = float64_to_int64(tfb.i);
338	alpha_ldt(inst.float_detail.fc, &tfc);	/* yes, ldt */
339}
340
341static uint64_t
342fp_c_to_fpcr_1(uint64_t fpcr, uint64_t fp_c)
343{
344	uint64_t disables;
345
346	/*
347	 * It's hard to arrange for conforming bit fields, because the FP_C
348	 * and the FPCR are both architected, with specified (and relatively
349	 * scrambled) bit numbers. Defining an internal unscrambled FP_C
350	 * wouldn't help much, because every user exception requires the
351	 * architected bit order in the sigcontext.
352	 *
353	 * Programs that fiddle with the fpcr exception bits (instead of fp_c)
354	 * will lose, because those bits can be and usually are subsetted;
355	 * the official home is in the fp_c. Furthermore, the kernel puts
356	 * phony enables (it lies :-) in the fpcr in order to get control when
357	 * it is necessary to initially set a sticky bit.
358	 */
359
360	fpcr &= FPCR_DYN(3);
361
362	/*
363	 * enable traps = case where flag bit is clear OR program wants a trap
364	 * enables = ~flags | mask
365	 * disables = ~(~flags | mask)
366	 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
367	 */
368	disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c);
369
370	fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
371	fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
372
373#	if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 &&		\
374	    FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 &&	\
375	    FP_X_UFL << (61 - 3) == FPCR_UNFD &&			\
376	    FP_X_IMP << (61 - 3) == FPCR_INED &&			\
377	    FP_X_OFL << (49 - 0) == FPCR_OVFD)
378#		error "Assertion failed"
379	/*
380	 * We don't care about the other built-in bit numbers because they
381	 * have been architecturally specified.
382	 */
383#	endif
384
385	fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
386	fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
387	if (fp_c & FP_C_MIRRORED)
388		fpcr |= FPCR_SUM;
389	if (fp_c & IEEE_MAP_UMZ)
390		fpcr |= FPCR_UNDZ | FPCR_UNFD;
391	fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41;
392	return fpcr;
393}
394
395static void
396fp_c_to_fpcr(struct lwp *l)
397{
398	alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), l->l_md.md_flags));
399}
400
401void
402alpha_write_fp_c(struct lwp *l, uint64_t fp_c)
403{
404	uint64_t md_flags;
405
406	fp_c &= MDLWP_FP_C;
407	md_flags = l->l_md.md_flags;
408	if ((md_flags & MDLWP_FP_C) == fp_c)
409		return;
410	l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c;
411	fpu_load();
412	alpha_pal_wrfen(1);
413	fp_c_to_fpcr(l);
414	alpha_pal_wrfen(0);
415}
416
417uint64_t
418alpha_read_fp_c(struct lwp *l)
419{
420	/*
421	 * A possibly-desireable EV6-specific optimization would deviate from
422	 * the Alpha Architecture spec and keep some FP_C bits in the FPCR,
423	 * but in a transparent way. Some of the code for that would need to
424	 * go right here.
425	 */
426	return l->l_md.md_flags & MDLWP_FP_C;
427}
428
429static float64
430float64_unk(float64 a, float64 b)
431{
432	return 0;
433}
434
435/*
436 * The real function field encodings for IEEE and VAX FP instructions.
437 *
438 * Since there is only one operand type field, the cvtXX instructions
439 * require a variety of special cases, and these have to be analyzed as
440 * they don't always fit into the field descriptions in AARM section I.
441 *
442 * Lots of staring at bits in the appendix shows what's really going on.
443 *
444 *	   |	       |
445 * 15 14 13|12 11 10 09|08 07 06 05
446 * --------======------============
447 *  TRAP   : RND : SRC : FUNCTION  :
448 *  0  0  0:. . .:. . . . . . . . . . . . Imprecise
449 *  0  0  1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output)
450 *	   |				 /V overfloat enable (if int output)
451 *  0  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST
452 *  0  1  1|. . .:. . . . . . . . . . . . Unsupported
453 *  1  0  0:. . .:. . . . . . . . . . . ./S software completion (VAX only)
454 *  1  0  1|. . .:. . . . . . . . . . . ./SU
455 *	   |				 /SV
456 *  1  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S
457 *  1  1  1|. . .:. . . . . . . . . . . ./SUI (if FP output)	(IEEE only)
458 *	   |				 /SVI (if int output)   (IEEE only)
459 *  S  I  UV: In other words: bits 15:13 are S:I:UV, except that _usually_
460 *	   |  not all combinations are valid.
461 *	   |	       |
462 * 15 14 13|12 11 10 09|08 07 06 05
463 * --------======------============
464 *  TRAP   : RND : SRC : FUNCTION  :
465 *	   | 0	0 . . . . . . . . . . . ./C Chopped
466 *	   : 0	1 . . . . . . . . . . . ./M Minus Infinity
467 *	   | 1	0 . . . . . . . . . . . .   Normal
468 *	   : 1	1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity)
469 *	   |	       |
470 * 15 14 13|12 11 10 09|08 07 06 05
471 * --------======------============
472 *  TRAP   : RND : SRC : FUNCTION  :
473 *		   0 0. . . . . . . . . . S/F
474 *		   0 1. . . . . . . . . . -/D
475 *		   1 0. . . . . . . . . . T/G
476 *		   1 1. . . . . . . . . . Q/Q
477 *	   |	       |
478 * 15 14 13|12 11 10 09|08 07 06 05
479 * --------======------============
480 *  TRAP   : RND : SRC : FUNCTION  :
481 *			 0  0  0  0 . . . addX
482 *			 0  0  0  1 . . . subX
483 *			 0  0  1  0 . . . mulX
484 *			 0  0  1  1 . . . divX
485 *			 0  1  0  0 . . . cmpXun
486 *			 0  1  0  1 . . . cmpXeq
487 *			 0  1  1  0 . . . cmpXlt
488 *			 0  1  1  1 . . . cmpXle
489 *			 1  0  0  0 . . . reserved
490 *			 1  0  0  1 . . . reserved
491 *			 1  0  1  0 . . . sqrt[fg] (op_fix, not exactly "vax")
492 *			 1  0  1  1 . . . sqrt[st] (op_fix, not exactly "ieee")
493 *			 1  1  0  0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f)
494 *			 1  1  0  1 . . . cvtXd   (vax only)
495 *			 1  1  1  0 . . . cvtXt/g (cvtqt, cvt[dq]g only)
496 *			 1  1  1  1 . . . cvtXq/q (cvttq, cvtgq)
497 *	   |	       |
498 * 15 14 13|12 11 10 09|08 07 06 05	  the twilight zone
499 * --------======------============
500 *  TRAP   : RND : SRC : FUNCTION  :
501 * /s /i /u  x  x  1  0  1  1  0  0 . . . cvtts, /siu only 0, 1, 5, 7
502 *  0  1  0  1  0  1  0  1  1  0  0 . . . cvtst   (src == T (!)) 2ac NOT /S
503 *  1  1  0  1  0  1  0  1  1  0  0 . . . cvtst/s (src == T (!)) 6ac
504 *  x  0  x  x  x  x  0	 1  1  1  1 . . . cvttq/_ (src == T)
505 */
506
507static void
508alpha_fp_interpret(alpha_instruction *pc, struct lwp *l, uint64_t bits)
509{
510	s_float sfa, sfb, sfc;
511	t_float tfa, tfb, tfc;
512	alpha_instruction inst;
513
514	inst.bits = bits;
515	switch(inst.generic_format.opcode) {
516	default:
517		/* this "cannot happen" */
518		this_cannot_happen(2, inst.bits);
519		return;
520	case op_any_float:
521		if (inst.float_format.function == op_cvtql_sv ||
522		    inst.float_format.function == op_cvtql_v) {
523			alpha_stt(inst.float_detail.fb, &tfb);
524			sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN;
525			alpha_lds(inst.float_detail.fc, &sfc);
526			float_raise(FP_X_INV);
527		} else {
528			++alpha_shadow.nilanyop;
529			this_cannot_happen(3, inst.bits);
530		}
531		break;
532	case op_vax_float:
533		++alpha_shadow.vax;	/* fall thru */
534	case op_ieee_float:
535	case op_fix_float:
536		switch(inst.float_detail.src) {
537		case op_src_sf:
538			sts(inst.float_detail.fb, &sfb, l);
539			if (inst.float_detail.opclass == 10)
540				sfc.i = float32_sqrt(sfb.i);
541			else if (inst.float_detail.opclass & ~3) {
542				this_cannot_happen(1, inst.bits);
543				sfc.i = FLOAT32QNAN;
544			} else {
545				sts(inst.float_detail.fa, &sfa, l);
546				sfc.i = (*swfp_s[inst.float_detail.opclass])(
547				    sfa.i, sfb.i);
548			}
549			lds(inst.float_detail.fc, &sfc, l);
550			break;
551		case op_src_xd:
552		case op_src_tg:
553			if (inst.float_detail.opclass >= 12)
554				(*swfp_cvt[inst.float_detail.opclass - 12])(
555				    inst.bits, l);
556			else {
557				stt(inst.float_detail.fb, &tfb, l);
558				if (inst.float_detail.opclass == 10)
559					tfc.i = float64_sqrt(tfb.i);
560				else {
561					stt(inst.float_detail.fa, &tfa, l);
562					tfc.i = (*swfp_t[inst.float_detail
563					    .opclass])(tfa.i, tfb.i);
564				}
565				ldt(inst.float_detail.fc, &tfc, l);
566			}
567			break;
568		case op_src_qq:
569			float_raise(FP_X_IMP);
570			break;
571		}
572	}
573}
574
575static int
576alpha_fp_complete_at(alpha_instruction *trigger_pc, struct lwp *l,
577    uint64_t *ucode)
578{
579	int needsig;
580	alpha_instruction inst;
581	uint64_t rm, fpcr, orig_fpcr;
582	uint64_t orig_flags, new_flags, changed_flags, md_flags;
583
584	if (__predict_false(copyin(trigger_pc, &inst, sizeof inst))) {
585		this_cannot_happen(6, -1);
586		return SIGSEGV;
587	}
588	fpu_load();
589	alpha_pal_wrfen(1);
590	/*
591	 * If necessary, lie about the dynamic rounding mode so emulation
592	 * software need go to only one place for it, and so we don't have to
593	 * lock any memory locations or pass a third parameter to every
594	 * SoftFloat entry point.
595	 */
596	orig_fpcr = fpcr = alpha_read_fpcr();
597	rm = inst.float_detail.rnd;
598	if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) {
599		fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm);
600		alpha_write_fpcr(fpcr);
601	}
602	orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
603
604	alpha_fp_interpret(trigger_pc, l, inst.bits);
605
606	md_flags = l->l_md.md_flags;
607
608	new_flags = FP_C_TO_NETBSD_FLAG(md_flags);
609	changed_flags = orig_flags ^ new_flags;
610	KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
611	alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags));
612	needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags);
613	alpha_pal_wrfen(0);
614	if (__predict_false(needsig)) {
615		*ucode = needsig;
616		return SIGFPE;
617	}
618	return 0;
619}
620
621int
622alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode)
623{
624	int t;
625	int sig;
626	uint64_t op_class;
627	alpha_instruction inst;
628	/* "trigger_pc" is Compaq's term for the earliest faulting op */
629	alpha_instruction *trigger_pc, *usertrap_pc;
630	alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
631
632	sig = SIGFPE;
633	pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC];
634	trigger_pc = pc - 1;	/* for ALPHA_AMASK_PAT case */
635	if (cpu_amask & ALPHA_AMASK_PAT) {
636		if (a0 & 1 || alpha_fp_sync_complete) {
637			sig = alpha_fp_complete_at(trigger_pc, l, ucode);
638			goto done;
639		}
640	}
641	*ucode = a0;
642	if (!(a0 & 1))
643		return sig;
644/*
645 * At this point we are somewhere in the trap shadow of one or more instruc-
646 * tions that have trapped with software completion specified.  We have a mask
647 * of the registers written by trapping instructions.
648 *
649 * Now step backwards through the trap shadow, clearing bits in the
650 * destination write mask until the trigger instruction is found, and
651 * interpret this one instruction in SW. If a SIGFPE is not required, back up
652 * the PC until just after this instruction and restart. This will execute all
653 * trap shadow instructions between the trigger pc and the trap pc twice.
654 *
655 * If a SIGFPE is generated from the OSF1 emulation,  back up one more
656 * instruction to the trigger pc itself. Native binaries don't because it
657 * is non-portable and completely defeats the intended purpose of IEEE
658 * traps -- for example, to count the number of exponent wraps for a later
659 * correction.
660 */
661	trigger_pc = 0;
662	win_begin = pc;
663	++alpha_shadow.scans;
664	t = alpha_shadow.len;
665	for (--pc; a1; --pc) {
666		++alpha_shadow.len;
667		if (pc < win_begin) {
668			win_begin = pc - TSWINSIZE + 1;
669			if (copyin(win_begin, tsw, sizeof tsw)) {
670				/* sigh, try to get just one */
671				win_begin = pc;
672				if (copyin(win_begin, tsw, 4))
673					return SIGSEGV;
674			}
675		}
676		assert(win_begin <= pc && !((long)pc  & 3));
677		inst = tsw[pc - win_begin];
678		op_class = 1UL << inst.generic_format.opcode;
679		if (op_class & FPUREG_CLASS) {
680			a1 &= ~(1UL << (inst.operate_generic_format.rc + 32));
681			trigger_pc = pc;
682		} else if (op_class & CPUREG_CLASS) {
683			a1 &= ~(1UL << inst.operate_generic_format.rc);
684			trigger_pc = pc;
685		} else if (op_class & TRAPSHADOWBOUNDARY) {
686			if (op_class & CHECKFUNCTIONCODE) {
687				if (inst.mem_format.displacement == op_trapb ||
688				    inst.mem_format.displacement == op_excb)
689					break;	/* code breaks AARM rules */
690			} else
691				break; /* code breaks AARM rules */
692		}
693		/* Some shadow-safe op, probably load, store, or FPTI class */
694	}
695	t = alpha_shadow.len - t;
696	if (t > alpha_shadow.max)
697		alpha_shadow.max = t;
698	if (__predict_true(trigger_pc != 0 && a1 == 0)) {
699		++alpha_shadow.resolved;
700		sig = alpha_fp_complete_at(trigger_pc, l, ucode);
701	} else {
702		++alpha_shadow.unresolved;
703		return sig;
704	}
705done:
706	if (sig) {
707		usertrap_pc = trigger_pc + 1;
708#ifdef COMPAT_OSF1
709		if (l->l_proc->p_emul == &emul_osf1)
710			usertrap_pc = trigger_pc;
711#endif
712		l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc;
713		return sig;
714	}
715	return 0;
716}
717
718/*
719 * Load the float-point context for the current lwp.
720 */
721void
722fpu_state_load(struct lwp *l, bool used)
723{
724	struct pcb * const pcb = lwp_getpcb(l);
725
726	/*
727	 * Instrument FP usage -- if a process had not previously
728	 * used FP, mark it as having used FP for the first time,
729	 * and count this event.
730	 *
731	 * If a process has used FP, count a "used FP, and took
732	 * a trap to use it again" event.
733	 */
734	if (!fpu_used_p(l)) {
735		atomic_inc_ulong(&fpevent_use.ev_count);
736		fpu_mark_used(l);
737	} else
738		atomic_inc_ulong(&fpevent_reuse.ev_count);
739
740	alpha_pal_wrfen(1);
741	restorefpstate(&pcb->pcb_fp);
742	alpha_pal_wrfen(0);
743
744	l->l_md.md_flags |= MDLWP_FPACTIVE;
745}
746
747/*
748 * Save the FPU state.
749 */
750
751void
752fpu_state_save(struct lwp *l)
753{
754	struct pcb * const pcb = lwp_getpcb(l);
755
756	alpha_pal_wrfen(1);
757	savefpstate(&pcb->pcb_fp);
758	alpha_pal_wrfen(0);
759}
760
761/*
762 * Release the FPU.
763 */
764void
765fpu_state_release(struct lwp *l)
766{
767	l->l_md.md_flags &= ~MDLWP_FPACTIVE;
768}
769