1/*	$OpenBSD: fp_complete.c,v 1.13 2023/01/31 15:18:51 deraadt Exp $	*/
2/*	$NetBSD: fp_complete.c,v 1.5 2002/01/18 22:15:56 ross Exp $	*/
3
4/*-
5 * Copyright (c) 2001 Ross Harvey
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the NetBSD
19 *	Foundation, Inc. and its contributors.
20 * 4. Neither the name of The NetBSD Foundation nor the names of its
21 *    contributors may be used to endorse or promote products derived
22 *    from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/proc.h>
40
41#include <machine/cpu.h>
42#include <machine/fpu.h>
43#include <machine/reg.h>
44#include <alpha/alpha/db_instruction.h>
45
46#include <lib/libkern/softfloat.h>
47
48#define	TSWINSIZE 4	/* size of trap shadow window in u_int32_t units */
49
50/*	Set Name		Opcodes			AARM C.* Symbols  */
51
52#define	CPUREG_CLASS		(0xfUL << 0x10)		/* INT[ALSM]	  */
53#define	FPUREG_CLASS		(0xfUL << 0x14)		/* ITFP, FLT[ILV] */
54#define	CHECKFUNCTIONCODE	(1UL << 0x18)		/* MISC		  */
55#define	TRAPSHADOWBOUNDARY	(1UL << 0x00 |		/* PAL		  */\
56				 1UL << 0x19 |		/* \PAL\	  */\
57				 1UL << 0x1a |		/* JSR		  */\
58				 1UL << 0x1b |		/* \PAL\	  */\
59				 1UL << 0x1d |		/* \PAL\	  */\
60				 1UL << 0x1e |		/* \PAL\	  */\
61				 1UL << 0x1f |		/* \PAL\	  */\
62				 0xffffUL << 0x30 | 	/* branch ops	  */\
63				 CHECKFUNCTIONCODE)
64
65#define	MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \
66	(u_int ## width ## _t)(sign) << ((width) - 1)			|\
67	(u_int ## width ## _t)(exp)  << ((width) - 1 - (expwidth))	|\
68	(u_int ## width ## _t)(msb)  << ((width) - 1 - (expwidth) - 1)	|\
69	(u_int ## width ## _t)(rest_of_frac)
70
71#define	FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0)
72#define	FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0)
73
74#define IS_SUBNORMAL(v)	((v)->exp == 0 && (v)->frac != 0)
75
76#define	PREFILTER_SUBNORMAL(p,v) \
77do { \
78	if ((p)->p_md.md_flags & IEEE_MAP_DMZ && IS_SUBNORMAL(v)) \
79		(v)->frac = 0; \
80} while (0)
81
82#define	POSTFILTER_SUBNORMAL(p,v) \
83do { \
84	if ((p)->p_md.md_flags & IEEE_MAP_UMZ && IS_SUBNORMAL(v)) \
85		(v)->frac = 0; \
86} while (0)
87
88	/* Alpha returns 2.0 for true, all zeroes for false. */
89
90#define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L)
91
92	/* Move bits from sw fp_c to hw fpcr. */
93
94#define	CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m)))
95
96/*
97 * Temporary trap shadow instrumentation. The [un]resolved counters
98 * could be kept permanently, as they provide information on whether
99 * user code has met AARM trap shadow generation requirements.
100 */
101
102struct alpha_shadow {
103	u_int64_t resolved;	/* cases trigger pc found */
104	u_int64_t unresolved;	/* cases it wasn't, code problems? */
105	u_int64_t scans;		/* trap shadow scans */
106	u_int64_t len;		/* number of instructions examined */
107	u_int64_t uop;		/* bit mask of unexpected opcodes */
108	u_int64_t sqrts;	/* ev6+ square root single count */
109	u_int64_t sqrtt;	/* ev6+ square root double count */
110	u_int32_t ufunc;	/* bit mask of unexpected functions */
111	u_int32_t max;		/* max trap shadow scan */
112	u_int32_t nilswop;	/* unexpected op codes */
113	u_int32_t nilswfunc;	/* unexpected function codes */
114	u_int32_t nilanyop;	/* this "cannot happen" */
115	u_int32_t vax;		/* sigs from vax fp opcodes */
116} alpha_shadow, alpha_shadow_zero;
117
118static float64 float64_unk(float64, float64);
119static float64 compare_un(float64, float64);
120static float64 compare_eq(float64, float64);
121static float64 compare_lt(float64, float64);
122static float64 compare_le(float64, float64);
123static void cvt_qs_ts_st_gf_qf(u_int32_t, struct proc *);
124static void cvt_gd(u_int32_t, struct proc *);
125static void cvt_qt_dg_qg(u_int32_t, struct proc *);
126static void cvt_tq_gq(u_int32_t, struct proc *);
127
128static float32 (*swfp_s[])(float32, float32) = {
129	float32_add, float32_sub, float32_mul, float32_div,
130};
131
132static float64 (*swfp_t[])(float64, float64) = {
133	float64_add, float64_sub, float64_mul, float64_div,
134	compare_un,    compare_eq,    compare_lt,    compare_le,
135	float64_unk, float64_unk, float64_unk, float64_unk
136};
137
138static void (*swfp_cvt[])(u_int32_t, struct proc *) = {
139	cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq
140};
141
142static void
143this_cannot_happen(int what_cannot_happen, int64_t bits)
144{
145	static int total;
146	alpha_instruction inst;
147	static u_int64_t reported;
148
149	inst.bits = bits;
150	++alpha_shadow.nilswfunc;
151	if (bits != -1)
152		alpha_shadow.uop |= 1UL << inst.generic_format.opcode;
153	if (1UL << what_cannot_happen & reported)
154		return;
155	reported |= 1UL << what_cannot_happen;
156	if (total >= 1000)
157		return;	/* right now, this return "cannot happen" */
158	++total;
159	if (bits)
160		printf("FP instruction %x\n", (unsigned int)bits);
161	printf("FP event %d/%lx/%lx\n", what_cannot_happen,
162	    (unsigned long)reported, (unsigned long)alpha_shadow.uop);
163}
164
165static __inline void
166sts(unsigned int rn, s_float *v, struct proc *p)
167{
168	alpha_sts(rn, v);
169	PREFILTER_SUBNORMAL(p, v);
170}
171
172static __inline void
173stt(unsigned int rn, t_float *v, struct proc *p)
174{
175	alpha_stt(rn, v);
176	PREFILTER_SUBNORMAL(p, v);
177}
178
179static __inline void
180lds(unsigned int rn, s_float *v, struct proc *p)
181{
182	POSTFILTER_SUBNORMAL(p, v);
183	alpha_lds(rn, v);
184}
185
186static __inline void
187ldt(unsigned int rn, t_float *v, struct proc *p)
188{
189	POSTFILTER_SUBNORMAL(p, v);
190	alpha_ldt(rn, v);
191}
192
193static float64
194compare_lt(float64 a, float64 b)
195{
196	return CMP_RESULT(float64_lt(a, b));
197}
198
199static float64
200compare_le(float64 a, float64 b)
201{
202	return CMP_RESULT(float64_le(a, b));
203}
204
205static float64
206compare_un(float64 a, float64 b)
207{
208	if (float64_is_nan(a) | float64_is_nan(b)) {
209		if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b))
210			float_set_invalid();
211		return CMP_RESULT(1);
212	}
213	return CMP_RESULT(0);
214}
215
216static float64
217compare_eq(float64 a, float64 b)
218{
219	return CMP_RESULT(float64_eq(a, b));
220}
221/*
222 * A note regarding the VAX FP ops.
223 *
224 * The AARM gives us complete leeway to set or not set status flags on VAX
225 * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set
226 * flags by IEEE rules.  Many ops are common to d/f/g and s/t source types.
227 * For the purely vax ones, it's hard to imagine ever running them.
228 * (Generated VAX fp ops with completion flags? Hmm.)  We are careful never
229 * to panic, assert, or print unlimited output based on a path through the
230 * decoder, so weird cases don't become security issues.
231 */
232static void
233cvt_qs_ts_st_gf_qf(u_int32_t inst_bits, struct proc *p)
234{
235	t_float tfb, tfc;
236	s_float sfb, sfc;
237	alpha_instruction inst;
238
239	inst.bits = inst_bits;
240	/*
241	 * cvtst and cvtts have the same opcode, function, and source.  The
242	 * distinction for cvtst is hidden in the illegal modifier combinations.
243	 * We decode even the non-/s modifier, so that the fix-up-always mode
244	 * works on ev6 and later. The rounding bits are unused and fixed for
245	 * cvtst, so we check those too.
246	 */
247	switch(inst.float_format.function) {
248	case op_cvtst:
249	case op_cvtst_u:
250		sts(inst.float_detail.fb, &sfb, p);
251		tfc.i = float32_to_float64(sfb.i);
252		ldt(inst.float_detail.fc, &tfc, p);
253		return;
254	}
255	if(inst.float_detail.src == 2) {
256		stt(inst.float_detail.fb, &tfb, p);
257		sfc.i = float64_to_float32(tfb.i);
258		lds(inst.float_detail.fc, &sfc, p);
259		return;
260	}
261	/* 0: S/F */
262	/* 1:  /D */
263	/* 3: Q/Q */
264	this_cannot_happen(5, inst.generic_format.opcode);
265	tfc.i = FLOAT64QNAN;
266	ldt(inst.float_detail.fc, &tfc, p);
267	return;
268}
269
270static void
271cvt_gd(u_int32_t inst_bits, struct proc *p)
272{
273	t_float tfb, tfc;
274	alpha_instruction inst;
275
276	inst.bits = inst_bits;
277	stt(inst.float_detail.fb, &tfb, p);
278	(void) float64_to_float32(tfb.i);
279	p->p_md.md_flags &= ~OPENBSD_FLAG_TO_FP_C(FP_X_IMP);
280	tfc.i = float64_add(tfb.i, (float64)0);
281	ldt(inst.float_detail.fc, &tfc, p);
282}
283
284static void
285cvt_qt_dg_qg(u_int32_t inst_bits, struct proc *p)
286{
287	t_float tfb, tfc;
288	alpha_instruction inst;
289
290	inst.bits = inst_bits;
291	switch(inst.float_detail.src) {
292	case 0:	/* S/F */
293		this_cannot_happen(3, inst.bits);
294		/* fall thru */
295	case 1: /* D */
296		/* VAX dirty 0's and reserved ops => UNPREDICTABLE */
297		/* We've done what's important by just not trapping */
298		tfc.i = 0;
299		break;
300	case 2: /* T/G */
301		this_cannot_happen(4, inst.bits);
302		tfc.i = 0;
303		break;
304	case 3:	/* Q/Q */
305		stt(inst.float_detail.fb, &tfb, p);
306		tfc.i = int64_to_float64(tfb.i);
307		break;
308	}
309	alpha_ldt(inst.float_detail.fc, &tfc);
310}
311/*
312 * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's
313 *      unfortunate habit of always returning the nontrapping result.
314 * XXX: there are several apparent AARM/AAH disagreements, as well as
315 *      the issue of trap handler pc and trapping results.
316 * XXX: this function will work for signed and unsigned 64-bit integers.
317 *      rounding will happen per IEEE 754.  invalid exception will be
318 *      raised if argument is infinity, not-a-number or if it
319 *      overflows/underflows.  zero will be returned, in this case.
320 */
321static void
322cvt_tq_gq(u_int32_t inst_bits, struct proc *p)
323{
324	t_float tfb, tfc;
325	alpha_instruction inst;
326
327	inst.bits = inst_bits;
328	stt(inst.float_detail.fb, &tfb, p);
329	tfc.i = float64_to_int64_no_overflow(tfb.i);
330	alpha_ldt(inst.float_detail.fc, &tfc);	/* yes, ldt */
331}
332
333static u_int64_t
334fp_c_to_fpcr_1(u_int64_t fpcr, u_int64_t fp_c)
335{
336	u_int64_t disables;
337
338	/*
339	 * It's hard to arrange for conforming bit fields, because the FP_C
340	 * and the FPCR are both architected, with specified (and relatively
341	 * scrambled) bit numbers. Defining an internal unscrambled FP_C
342	 * wouldn't help much, because every user exception requires the
343	 * architected bit order in the sigcontext.
344	 *
345	 * Programs that fiddle with the fpcr exception bits (instead of fp_c)
346	 * will lose, because those bits can be and usually are subsetted;
347	 * the official home is in the fp_c. Furthermore, the kernel puts
348	 * phony enables (it lies :-) in the fpcr in order to get control when
349	 * it is necessary to initially set a sticky bit.
350	 */
351
352	fpcr &= FPCR_DYN(3);
353
354	/*
355	 * enable traps = case where flag bit is clear OR program wants a trap
356	 * enables = ~flags | mask
357	 * disables = ~(~flags | mask)
358	 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
359	 */
360	disables = FP_C_TO_OPENBSD_FLAG(fp_c) & ~FP_C_TO_OPENBSD_MASK(fp_c);
361
362	fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
363	fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
364
365#	if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 &&		\
366	    FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 &&	\
367	    FP_X_UFL << (61 - 3) == FPCR_UNFD &&			\
368	    FP_X_IMP << (61 - 3) == FPCR_INED &&			\
369	    FP_X_OFL << (49 - 0) == FPCR_OVFD)
370#		error "Assertion failed"
371	/*
372	 * We don't care about the other built-in bit numbers because they
373	 * have been architecturally specified.
374	 */
375#	endif
376
377	fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
378	fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
379	if (fp_c & FP_C_MIRRORED)
380		fpcr |= FPCR_SUM;
381	if (fp_c & IEEE_MAP_UMZ)
382		fpcr |= FPCR_UNDZ | FPCR_UNFD;
383	fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41;
384	return fpcr;
385}
386
387static void
388fp_c_to_fpcr(struct proc *p)
389{
390	alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), p->p_md.md_flags));
391}
392
393void
394alpha_write_fp_c(struct proc *p, u_int64_t fp_c)
395{
396	u_int64_t md_flags;
397
398	fp_c &= MDP_FP_C;
399	md_flags = p->p_md.md_flags;
400	if ((md_flags & MDP_FP_C) == fp_c)
401		return;
402	p->p_md.md_flags = (md_flags & ~MDP_FP_C) | fp_c;
403	alpha_enable_fp(p, 1);
404	alpha_pal_wrfen(1);
405	fp_c_to_fpcr(p);
406	alpha_pal_wrfen(0);
407}
408
409u_int64_t
410alpha_read_fp_c(struct proc *p)
411{
412	/*
413	 * A possibly desirable EV6-specific optimization would deviate from
414	 * the Alpha Architecture spec and keep some FP_C bits in the FPCR,
415	 * but in a transparent way. Some of the code for that would need to
416	 * go right here.
417	 */
418	return p->p_md.md_flags & MDP_FP_C;
419}
420
421static float64
422float64_unk(float64 a, float64 b)
423{
424	return 0;
425}
426
427/*
428 * The real function field encodings for IEEE and VAX FP instructions.
429 *
430 * Since there is only one operand type field, the cvtXX instructions
431 * require a variety of special cases, and these have to be analyzed as
432 * they don't always fit into the field descriptions in AARM section I.
433 *
434 * Lots of staring at bits in the appendix shows what's really going on.
435 *
436 *	   |	       |
437 * 15 14 13|12 11 10 09|08 07 06 05
438 * --------======------============
439 *  TRAP   : RND : SRC : FUNCTION  :
440 *  0  0  0:. . .:. . . . . . . . . . . . Imprecise
441 *  0  0  1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output)
442 *	   |				 /V overfloat enable (if int output)
443 *  0  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST
444 *  0  1  1|. . .:. . . . . . . . . . . . Unsupported
445 *  1  0  0:. . .:. . . . . . . . . . . ./S software completion (VAX only)
446 *  1  0  1|. . .:. . . . . . . . . . . ./SU
447 *	   |				 /SV
448 *  1  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S
449 *  1  1  1|. . .:. . . . . . . . . . . ./SUI (if FP output)	(IEEE only)
450 *	   |				 /SVI (if int output)   (IEEE only)
451 *  S  I  UV: In other words: bits 15:13 are S:I:UV, except that _usually_
452 *	   |  not all combinations are valid.
453 *	   |	       |
454 * 15 14 13|12 11 10 09|08 07 06 05
455 * --------======------============
456 *  TRAP   : RND : SRC : FUNCTION  :
457 *	   | 0	0 . . . . . . . . . . . ./C Chopped
458 *	   : 0	1 . . . . . . . . . . . ./M Minus Infinity
459 *	   | 1	0 . . . . . . . . . . . .   Normal
460 *	   : 1	1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity)
461 *	   |	       |
462 * 15 14 13|12 11 10 09|08 07 06 05
463 * --------======------============
464 *  TRAP   : RND : SRC : FUNCTION  :
465 *		   0 0. . . . . . . . . . S/F
466 *		   0 1. . . . . . . . . . -/D
467 *		   1 0. . . . . . . . . . T/G
468 *		   1 1. . . . . . . . . . Q/Q
469 *	   |	       |
470 * 15 14 13|12 11 10 09|08 07 06 05
471 * --------======------============
472 *  TRAP   : RND : SRC : FUNCTION  :
473 *			 0  0  0  0 . . . addX
474 *			 0  0  0  1 . . . subX
475 *			 0  0  1  0 . . . mulX
476 *			 0  0  1  1 . . . divX
477 *			 0  1  0  0 . . . cmpXun
478 *			 0  1  0  1 . . . cmpXeq
479 *			 0  1  1  0 . . . cmpXlt
480 *			 0  1  1  1 . . . cmpXle
481 *			 1  0  0  0 . . . reserved
482 *			 1  0  0  1 . . . reserved
483 *			 1  0  1  0 . . . sqrt[fg] (op_fix, not exactly "vax")
484 *			 1  0  1  1 . . . sqrt[st] (op_fix, not exactly "ieee")
485 *			 1  1  0  0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f)
486 *			 1  1  0  1 . . . cvtXd   (vax only)
487 *			 1  1  1  0 . . . cvtXt/g (cvtqt, cvt[dq]g only)
488 *			 1  1  1  1 . . . cvtXq/q (cvttq, cvtgq)
489 *	   |	       |
490 * 15 14 13|12 11 10 09|08 07 06 05	  the twilight zone
491 * --------======------============
492 *  TRAP   : RND : SRC : FUNCTION  :
493 * /s /i /u  x  x  1  0  1  1  0  0 . . . cvtts, /siu only 0, 1, 5, 7
494 *  0  1  0  1  0  1  0  1  1  0  0 . . . cvtst   (src == T (!)) 2ac NOT /S
495 *  1  1  0  1  0  1  0  1  1  0  0 . . . cvtst/s (src == T (!)) 6ac
496 *  x  0  x  x  x  x  0	 1  1  1  1 . . . cvttq/_ (src == T)
497 */
498
499static void
500alpha_fp_interpret(struct proc *p, u_int64_t bits)
501{
502	s_float sfa, sfb, sfc;
503	t_float tfa, tfb, tfc;
504	alpha_instruction inst;
505
506	inst.bits = bits;
507	switch(inst.generic_format.opcode) {
508	default:
509		/* this "cannot happen" */
510		this_cannot_happen(2, inst.bits);
511		return;
512	case op_any_float:
513		if (inst.float_format.function == op_cvtql_sv ||
514		    inst.float_format.function == op_cvtql_v) {
515			alpha_stt(inst.float_detail.fb, &tfb);
516			sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN;
517			alpha_lds(inst.float_detail.fc, &sfc);
518			float_raise(FP_X_INV);
519		} else {
520			++alpha_shadow.nilanyop;
521			this_cannot_happen(3, inst.bits);
522		}
523		break;
524	case op_vax_float:
525		++alpha_shadow.vax;	/* fall thru */
526	case op_ieee_float:
527	case op_fix_float:
528		switch(inst.float_detail.src) {
529		case op_src_sf:
530			sts(inst.float_detail.fb, &sfb, p);
531			if (inst.float_detail.opclass == 10)
532				sfc.i = float32_sqrt(sfb.i);
533			else if (inst.float_detail.opclass & ~3) {
534				this_cannot_happen(1, inst.bits);
535				sfc.i = FLOAT32QNAN;
536			} else {
537				sts(inst.float_detail.fa, &sfa, p);
538				sfc.i = (*swfp_s[inst.float_detail.opclass])(
539				    sfa.i, sfb.i);
540			}
541			lds(inst.float_detail.fc, &sfc, p);
542			break;
543		case op_src_xd:
544		case op_src_tg:
545			if (inst.float_detail.opclass >= 12)
546				(*swfp_cvt[inst.float_detail.opclass - 12])(
547				    inst.bits, p);
548			else {
549				stt(inst.float_detail.fb, &tfb, p);
550				if (inst.float_detail.opclass == 10)
551					tfc.i = float64_sqrt(tfb.i);
552				else {
553					stt(inst.float_detail.fa, &tfa, p);
554					tfc.i = (*swfp_t[inst.float_detail
555					    .opclass])(tfa.i, tfb.i);
556				}
557				ldt(inst.float_detail.fc, &tfc, p);
558			}
559			break;
560		case op_src_qq:
561			float_raise(FP_X_IMP);
562			break;
563		}
564	}
565}
566
567int
568alpha_fp_complete_at(u_long trigger_pc, struct proc *p, u_int64_t *ucode)
569{
570	int needsig;
571	alpha_instruction inst;
572	u_int64_t rm, fpcr, orig_fpcr;
573	u_int64_t orig_flags, new_flags, changed_flags, md_flags;
574
575	if (__predict_false(copyinsn(NULL, (u_int32_t *)trigger_pc,
576	    (u_int32_t *)&inst))) {
577		this_cannot_happen(6, -1);
578		return SIGSEGV;
579	}
580	alpha_enable_fp(p, 1);
581	alpha_pal_wrfen(1);
582	/*
583	 * If necessary, lie about the dynamic rounding mode so emulation
584	 * software need go to only one place for it, and so we don't have to
585	 * lock any memory locations or pass a third parameter to every
586	 * SoftFloat entry point.
587	 */
588	orig_fpcr = fpcr = alpha_read_fpcr();
589	rm = inst.float_detail.rnd;
590	if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) {
591		fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm);
592		alpha_write_fpcr(fpcr);
593	}
594	orig_flags = FP_C_TO_OPENBSD_FLAG(p->p_md.md_flags);
595
596	alpha_fp_interpret(p, inst.bits);
597
598	md_flags = p->p_md.md_flags;
599
600	new_flags = FP_C_TO_OPENBSD_FLAG(md_flags);
601	changed_flags = orig_flags ^ new_flags;
602	KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
603	alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags));
604	needsig = changed_flags & FP_C_TO_OPENBSD_MASK(md_flags);
605	alpha_pal_wrfen(0);
606	if (__predict_false(needsig)) {
607		*ucode = needsig;
608		return SIGFPE;
609	}
610	return 0;
611}
612
613int
614alpha_fp_complete(u_long a0, u_long a1, struct proc *p, u_int64_t *ucode)
615{
616	int t;
617	int sig;
618	u_int64_t op_class;
619	alpha_instruction inst;
620	/* "trigger_pc" is Compaq's term for the earliest faulting op */
621	u_long trigger_pc, usertrap_pc;
622	alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
623
624	sig = SIGFPE;
625	pc = (alpha_instruction *)p->p_md.md_tf->tf_regs[FRAME_PC];
626	trigger_pc = (u_long)pc - 4;	/* for ALPHA_AMASK_PAT case */
627	if (cpu_amask & ALPHA_AMASK_PAT) {
628		if (a0 & 1 || alpha_fp_sync_complete) {
629			sig = alpha_fp_complete_at(trigger_pc, p, ucode);
630			goto done;
631		}
632	}
633	*ucode = a0;
634	if (!(a0 & 1))
635		return sig;
636/*
637 * At this point we are somewhere in the trap shadow of one or more instruc-
638 * tions that have trapped with software completion specified.  We have a mask
639 * of the registers written by trapping instructions.
640 *
641 * Now step backwards through the trap shadow, clearing bits in the
642 * destination write mask until the trigger instruction is found, and
643 * interpret this one instruction in SW. If a SIGFPE is not required, back up
644 * the PC until just after this instruction and restart. This will execute all
645 * trap shadow instructions between the trigger pc and the trap pc twice.
646 */
647	trigger_pc = 0;
648	win_begin = pc;
649	++alpha_shadow.scans;
650	t = alpha_shadow.len;
651	for (--pc; a1; --pc) {
652		++alpha_shadow.len;
653		if (pc < win_begin) {
654			win_begin = pc - TSWINSIZE + 1;
655			if (_copyin(win_begin, tsw, sizeof tsw)) {
656				/* sigh, try to get just one */
657				win_begin = pc;
658				if (_copyin(win_begin, tsw, 4))
659					return SIGSEGV;
660			}
661		}
662		inst = tsw[pc - win_begin];
663		op_class = 1UL << inst.generic_format.opcode;
664		if (op_class & FPUREG_CLASS) {
665			a1 &= ~(1UL << (inst.operate_generic_format.rc + 32));
666			trigger_pc = (u_long)pc;
667		} else if (op_class & CPUREG_CLASS) {
668			a1 &= ~(1UL << inst.operate_generic_format.rc);
669			trigger_pc = (u_long)pc;
670		} else if (op_class & TRAPSHADOWBOUNDARY) {
671			if (op_class & CHECKFUNCTIONCODE) {
672				if (inst.mem_format.displacement == op_trapb ||
673				    inst.mem_format.displacement == op_excb)
674					break;	/* code breaks AARM rules */
675			} else
676				break; /* code breaks AARM rules */
677		}
678		/* Some shadow-safe op, probably load, store, or FPTI class */
679	}
680	t = alpha_shadow.len - t;
681	if (t > alpha_shadow.max)
682		alpha_shadow.max = t;
683	if (__predict_true(trigger_pc != 0 && a1 == 0)) {
684		++alpha_shadow.resolved;
685		sig = alpha_fp_complete_at(trigger_pc, p, ucode);
686	} else {
687		++alpha_shadow.unresolved;
688		return sig;
689	}
690done:
691	if (sig) {
692		usertrap_pc = trigger_pc + 4;
693		p->p_md.md_tf->tf_regs[FRAME_PC] = usertrap_pc;
694		return sig;
695	}
696	return 0;
697}
698