1/*
2 * fp_util.S
3 *
4 * Copyright Roman Zippel, 1997.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, and the entire permission notice in its entirety,
11 *    including the disclaimer of warranties.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 *    products derived from this software without specific prior
17 *    written permission.
18 *
19 * ALTERNATIVELY, this product may be distributed under the terms of
20 * the GNU General Public License, in which case the provisions of the GPL are
21 * required INSTEAD OF the above restrictions.  (This clause is
22 * necessary due to a potential bad interaction between the GPL and
23 * the restrictions contained in a BSD-style copyright.)
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35 * OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <linux/config.h>
39#include "fp_emu.h"
40
41/*
42 * Here are lots of conversion and normalization functions mainly
43 * used by fp_scan.S
44 * Note that these functions are optimized for "normal" numbers,
45 * these are handled first and exit as fast as possible, this is
46 * especially important for fp_normalize_ext/fp_conv_ext2ext, as
47 * it's called very often.
48 * The register usage is optimized for fp_scan.S and which register
49 * is currently at that time unused, be careful if you want change
50 * something here. %d0 and %d1 is always usable, sometimes %d2 (or
51 * only the lower half) most function have to return the %a0
52 * unmodified, so that the caller can immediatly reuse it.
53 */
54
55	.globl	fp_ill, fp_end
56
57	| exits from fp_scan:
58	| illegal instruction
59fp_ill:
60	printf	,"fp_illegal\n"
61	rts
62	| completed instruction
63fp_end:
64	tst.l	(TASK_MM-8,%a2)
65	jmi	1f
66	tst.l	(TASK_MM-4,%a2)
67	jmi	1f
68	tst.l	(TASK_MM,%a2)
69	jpl	2f
701:	printf	,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
712:	clr.l	%d0
72	rts
73
74	.globl	fp_conv_long2ext, fp_conv_single2ext
75	.globl	fp_conv_double2ext, fp_conv_ext2ext
76	.globl	fp_normalize_ext, fp_normalize_double
77	.globl	fp_normalize_single, fp_normalize_single_fast
78	.globl	fp_conv_ext2double, fp_conv_ext2single
79	.globl	fp_conv_ext2long, fp_conv_ext2short
80	.globl	fp_conv_ext2byte
81	.globl	fp_finalrounding_single, fp_finalrounding_single_fast
82	.globl	fp_finalrounding_double
83	.globl	fp_finalrounding, fp_finaltest, fp_final
84
85/*
86 * First several conversion functions from a source operand
87 * into the extended format. Note, that only fp_conv_ext2ext
88 * normalizes the number and is always called after the other
89 * conversion functions, which only move the information into
90 * fp_ext structure.
91 */
92
93	| fp_conv_long2ext:
94	|
95	| args:	%d0 = source (32-bit long)
96	|	%a0 = destination (ptr to struct fp_ext)
97
98fp_conv_long2ext:
99	printf	PCONV,"l2e: %p -> %p(",2,%d0,%a0
100	clr.l	%d1			| sign defaults to zero
101	tst.l	%d0
102	jeq	fp_l2e_zero		| is source zero?
103	jpl	1f			| positive?
104	moveq	#1,%d1
105	neg.l	%d0
1061:	swap	%d1
107	move.w	#0x3fff+31,%d1
108	move.l	%d1,(%a0)+		| set sign / exp
109	move.l	%d0,(%a0)+		| set mantissa
110	clr.l	(%a0)
111	subq.l	#8,%a0			| restore %a0
112	printx	PCONV,%a0@
113	printf	PCONV,")\n"
114	rts
115	| source is zero
116fp_l2e_zero:
117	clr.l	(%a0)+
118	clr.l	(%a0)+
119	clr.l	(%a0)
120	subq.l	#8,%a0
121	printx	PCONV,%a0@
122	printf	PCONV,")\n"
123	rts
124
125	| fp_conv_single2ext
126	| args:	%d0 = source (single-precision fp value)
127	|	%a0 = dest (struct fp_ext *)
128
129fp_conv_single2ext:
130	printf	PCONV,"s2e: %p -> %p(",2,%d0,%a0
131	move.l	%d0,%d1
132	lsl.l	#8,%d0			| shift mantissa
133	lsr.l	#8,%d1			| exponent / sign
134	lsr.l	#7,%d1
135	lsr.w	#8,%d1
136	jeq	fp_s2e_small		| zero / denormal?
137	cmp.w	#0xff,%d1		| NaN / Inf?
138	jeq	fp_s2e_large
139	bset	#31,%d0			| set explizit bit
140	add.w	#0x3fff-0x7f,%d1	| re-bias the exponent.
1419:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
142	move.l	%d0,(%a0)+		| high lword of fp_ext.mant
143	clr.l	(%a0)			| low lword = 0
144	subq.l	#8,%a0
145	printx	PCONV,%a0@
146	printf	PCONV,")\n"
147	rts
148	| zeros and denormalized
149fp_s2e_small:
150	| exponent is zero, so explizit bit is already zero too
151	tst.l	%d0
152	jeq	9b
153	move.w	#0x4000-0x7f,%d1
154	jra	9b
155	| infinities and NAN
156fp_s2e_large:
157	bclr	#31,%d0			| clear explizit bit
158	move.w	#0x7fff,%d1
159	jra	9b
160
161fp_conv_double2ext:
162#ifdef FPU_EMU_DEBUG
163	getuser.l %a1@(0),%d0,fp_err_ua2,%a1
164	getuser.l %a1@(4),%d1,fp_err_ua2,%a1
165	printf	PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
166#endif
167	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
168	move.l	%d0,%d1
169	lsl.l	#8,%d0			| shift high mantissa
170	lsl.l	#3,%d0
171	lsr.l	#8,%d1			| exponent / sign
172	lsr.l	#7,%d1
173	lsr.w	#5,%d1
174	jeq	fp_d2e_small		| zero / denormal?
175	cmp.w	#0x7ff,%d1		| NaN / Inf?
176	jeq	fp_d2e_large
177	bset	#31,%d0			| set explizit bit
178	add.w	#0x3fff-0x3ff,%d1	| re-bias the exponent.
1799:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
180	move.l	%d0,(%a0)+
181	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
182	move.l	%d0,%d1
183	lsl.l	#8,%d0
184	lsl.l	#3,%d0
185	move.l	%d0,(%a0)
186	moveq	#21,%d0
187	lsr.l	%d0,%d1
188	or.l	%d1,-(%a0)
189	subq.l	#4,%a0
190	printx	PCONV,%a0@
191	printf	PCONV,")\n"
192	rts
193	| zeros and denormalized
194fp_d2e_small:
195	| exponent is zero, so explizit bit is already zero too
196	tst.l	%d0
197	jeq	9b
198	move.w	#0x4000-0x3ff,%d1
199	jra	9b
200	| infinities and NAN
201fp_d2e_large:
202	bclr	#31,%d0			| clear explizit bit
203	move.w	#0x7fff,%d1
204	jra	9b
205
206	| fp_conv_ext2ext:
207	| originally used to get longdouble from userspace, now it's
208	| called before arithmetic operations to make sure the number
209	| is normalized [maybe rename it?].
210	| args:	%a0 = dest (struct fp_ext *)
211	| returns 0 in %d0 for a NaN, otherwise 1
212
213fp_conv_ext2ext:
214	printf	PCONV,"e2e: %p(",1,%a0
215	printx	PCONV,%a0@
216	printf	PCONV,"), "
217	move.l	(%a0)+,%d0
218	cmp.w	#0x7fff,%d0		| Inf / NaN?
219	jeq	fp_e2e_large
220	move.l	(%a0),%d0
221	jpl	fp_e2e_small		| zero / denorm?
222	| The high bit is set, so normalization is irrelevant.
223fp_e2e_checkround:
224	subq.l	#4,%a0
225#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
226	move.b	(%a0),%d0
227	jne	fp_e2e_round
228#endif
229	printf	PCONV,"%p(",1,%a0
230	printx	PCONV,%a0@
231	printf	PCONV,")\n"
232	moveq	#1,%d0
233	rts
234#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
235fp_e2e_round:
236	fp_set_sr FPSR_EXC_INEX2
237	clr.b	(%a0)
238	move.w	(FPD_RND,FPDATA),%d2
239	jne	fp_e2e_roundother	| %d2 == 0, round to nearest
240	tst.b	%d0			| test guard bit
241	jpl	9f			| zero is closer
242	btst	#0,(11,%a0)		| test lsb bit
243	jne	fp_e2e_doroundup	| round to infinity
244	lsl.b	#1,%d0			| check low bits
245	jeq	9f			| round to zero
246fp_e2e_doroundup:
247	addq.l	#1,(8,%a0)
248	jcc	9f
249	addq.l	#1,(4,%a0)
250	jcc	9f
251	move.w	#0x8000,(4,%a0)
252	addq.w	#1,(2,%a0)
2539:	printf	PNORM,"%p(",1,%a0
254	printx	PNORM,%a0@
255	printf	PNORM,")\n"
256	rts
257fp_e2e_roundother:
258	subq.w	#2,%d2
259	jcs	9b			| %d2 < 2, round to zero
260	jhi	1f			| %d2 > 2, round to +infinity
261	tst.b	(1,%a0)			| to -inf
262	jne	fp_e2e_doroundup	| negative, round to infinity
263	jra	9b			| positive, round to zero
2641:	tst.b	(1,%a0)			| to +inf
265	jeq	fp_e2e_doroundup	| positive, round to infinity
266	jra	9b			| negative, round to zero
267#endif
268	| zeros and subnormals:
269	| try to normalize these anyway.
270fp_e2e_small:
271	jne	fp_e2e_small1		| high lword zero?
272	move.l	(4,%a0),%d0
273	jne	fp_e2e_small2
274#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
275	clr.l	%d0
276	move.b	(-4,%a0),%d0
277	jne	fp_e2e_small3
278#endif
279	| Genuine zero.
280	clr.w	-(%a0)
281	subq.l	#2,%a0
282	printf	PNORM,"%p(",1,%a0
283	printx	PNORM,%a0@
284	printf	PNORM,")\n"
285	moveq	#1,%d0
286	rts
287	| definitely subnormal, need to shift all 64 bits
288fp_e2e_small1:
289	bfffo	%d0{#0,#32},%d1
290	move.w	-(%a0),%d2
291	sub.w	%d1,%d2
292	jcc	1f
293	| Pathologically small, denormalize.
294	add.w	%d2,%d1
295	clr.w	%d2
2961:	move.w	%d2,(%a0)+
297	move.w	%d1,%d2
298	jeq	fp_e2e_checkround
299	| fancy 64-bit double-shift begins here
300	lsl.l	%d2,%d0
301	move.l	%d0,(%a0)+
302	move.l	(%a0),%d0
303	move.l	%d0,%d1
304	lsl.l	%d2,%d0
305	move.l	%d0,(%a0)
306	neg.w	%d2
307	and.w	#0x1f,%d2
308	lsr.l	%d2,%d1
309	or.l	%d1,-(%a0)
310#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
311fp_e2e_extra1:
312	clr.l	%d0
313	move.b	(-4,%a0),%d0
314	neg.w	%d2
315	add.w	#24,%d2
316	jcc	1f
317	clr.b	(-4,%a0)
318	lsl.l	%d2,%d0
319	or.l	%d0,(4,%a0)
320	jra	fp_e2e_checkround
3211:	addq.w	#8,%d2
322	lsl.l	%d2,%d0
323	move.b	%d0,(-4,%a0)
324	lsr.l	#8,%d0
325	or.l	%d0,(4,%a0)
326#endif
327	jra	fp_e2e_checkround
328	| pathologically small subnormal
329fp_e2e_small2:
330	bfffo	%d0{#0,#32},%d1
331	add.w	#32,%d1
332	move.w	-(%a0),%d2
333	sub.w	%d1,%d2
334	jcc	1f
335	| Beyond pathologically small, denormalize.
336	add.w	%d2,%d1
337	clr.w	%d2
3381:	move.w	%d2,(%a0)+
339	ext.l	%d1
340	jeq	fp_e2e_checkround
341	clr.l	(4,%a0)
342	sub.w	#32,%d2
343	jcs	1f
344	lsl.l	%d1,%d0			| lower lword needs only to be shifted
345	move.l	%d0,(%a0)		| into the higher lword
346#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
347	clr.l	%d0
348	move.b	(-4,%a0),%d0
349	clr.b	(-4,%a0)
350	neg.w	%d1
351	add.w	#32,%d1
352	bfins	%d0,(%a0){%d1,#8}
353#endif
354	jra	fp_e2e_checkround
3551:	neg.w	%d1			| lower lword is splitted between
356	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
357#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
358	jra	fp_e2e_checkround
359#else
360	move.w	%d1,%d2
361	jra	fp_e2e_extra1
362	| These are extremely small numbers, that will mostly end up as zero
363	| anyway, so this is only important for correct rounding.
364fp_e2e_small3:
365	bfffo	%d0{#24,#8},%d1
366	add.w	#40,%d1
367	move.w	-(%a0),%d2
368	sub.w	%d1,%d2
369	jcc	1f
370	| Pathologically small, denormalize.
371	add.w	%d2,%d1
372	clr.w	%d2
3731:	move.w	%d2,(%a0)+
374	ext.l	%d1
375	jeq	fp_e2e_checkround
376	cmp.w	#8,%d1
377	jcs	2f
3781:	clr.b	(-4,%a0)
379	sub.w	#64,%d1
380	jcs	1f
381	add.w	#24,%d1
382	lsl.l	%d1,%d0
383	move.l	%d0,(%a0)
384	jra	fp_e2e_checkround
3851:	neg.w	%d1
386	bfins	%d0,(%a0){%d1,#8}
387	jra	fp_e2e_checkround
3882:	lsl.l	%d1,%d0
389	move.b	%d0,(-4,%a0)
390	lsr.l	#8,%d0
391	move.b	%d0,(7,%a0)
392	jra	fp_e2e_checkround
393#endif
3941:	move.l	%d0,%d1			| lower lword is splitted between
395	lsl.l	%d2,%d0			| higher and lower lword
396	move.l	%d0,(%a0)
397	move.l	%d1,%d0
398	neg.w	%d2
399	add.w	#32,%d2
400	lsr.l	%d2,%d0
401	move.l	%d0,-(%a0)
402	jra	fp_e2e_checkround
403	| Infinities and NaNs
404fp_e2e_large:
405	move.l	(%a0)+,%d0
406	jne	3f
4071:	tst.l	(%a0)
408	jne	4f
409	moveq	#1,%d0
4102:	subq.l	#8,%a0
411	printf	PCONV,"%p(",1,%a0
412	printx	PCONV,%a0@
413	printf	PCONV,")\n"
414	rts
415	| we have maybe a NaN, shift off the highest bit
4163:	lsl.l	#1,%d0
417	jeq	1b
418	| we have a NaN, clear the return value
4194:	clrl	%d0
420	jra	2b
421
422
423/*
424 * Normalization functions.  Call these on the output of general
425 * FP operators, and before any conversion into the destination
426 * formats. fp_normalize_ext has always to be called first, the
427 * following conversion functions expect an already normalized
428 * number.
429 */
430
431	| fp_normalize_ext:
432	| normalize an extended in extended (unpacked) format, basically
433	| it does the same as fp_conv_ext2ext, additionally it also does
434	| the necessary postprocessing checks.
435	| args:	%a0 (struct fp_ext *)
436	| NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
437
438fp_normalize_ext:
439	printf	PNORM,"ne: %p(",1,%a0
440	printx	PNORM,%a0@
441	printf	PNORM,"), "
442	move.l	(%a0)+,%d0
443	cmp.w	#0x7fff,%d0		| Inf / NaN?
444	jeq	fp_ne_large
445	move.l	(%a0),%d0
446	jpl	fp_ne_small		| zero / denorm?
447	| The high bit is set, so normalization is irrelevant.
448fp_ne_checkround:
449	subq.l	#4,%a0
450#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
451	move.b	(%a0),%d0
452	jne	fp_ne_round
453#endif
454	printf	PNORM,"%p(",1,%a0
455	printx	PNORM,%a0@
456	printf	PNORM,")\n"
457	rts
458#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
459fp_ne_round:
460	fp_set_sr FPSR_EXC_INEX2
461	clr.b	(%a0)
462	move.w	(FPD_RND,FPDATA),%d2
463	jne	fp_ne_roundother	| %d2 == 0, round to nearest
464	tst.b	%d0			| test guard bit
465	jpl	9f			| zero is closer
466	btst	#0,(11,%a0)		| test lsb bit
467	jne	fp_ne_doroundup		| round to infinity
468	lsl.b	#1,%d0			| check low bits
469	jeq	9f			| round to zero
470fp_ne_doroundup:
471	addq.l	#1,(8,%a0)
472	jcc	9f
473	addq.l	#1,(4,%a0)
474	jcc	9f
475	addq.w	#1,(2,%a0)
476	move.w	#0x8000,(4,%a0)
4779:	printf	PNORM,"%p(",1,%a0
478	printx	PNORM,%a0@
479	printf	PNORM,")\n"
480	rts
481fp_ne_roundother:
482	subq.w	#2,%d2
483	jcs	9b			| %d2 < 2, round to zero
484	jhi	1f			| %d2 > 2, round to +infinity
485	tst.b	(1,%a0)			| to -inf
486	jne	fp_ne_doroundup		| negative, round to infinity
487	jra	9b			| positive, round to zero
4881:	tst.b	(1,%a0)			| to +inf
489	jeq	fp_ne_doroundup		| positive, round to infinity
490	jra	9b			| negative, round to zero
491#endif
492	| Zeros and subnormal numbers
493	| These are probably merely subnormal, rather than "denormalized"
494	|  numbers, so we will try to make them normal again.
495fp_ne_small:
496	jne	fp_ne_small1		| high lword zero?
497	move.l	(4,%a0),%d0
498	jne	fp_ne_small2
499#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
500	clr.l	%d0
501	move.b	(-4,%a0),%d0
502	jne	fp_ne_small3
503#endif
504	| Genuine zero.
505	clr.w	-(%a0)
506	subq.l	#2,%a0
507	printf	PNORM,"%p(",1,%a0
508	printx	PNORM,%a0@
509	printf	PNORM,")\n"
510	rts
511	| Subnormal.
512fp_ne_small1:
513	bfffo	%d0{#0,#32},%d1
514	move.w	-(%a0),%d2
515	sub.w	%d1,%d2
516	jcc	1f
517	| Pathologically small, denormalize.
518	add.w	%d2,%d1
519	clr.w	%d2
520	fp_set_sr FPSR_EXC_UNFL
5211:	move.w	%d2,(%a0)+
522	move.w	%d1,%d2
523	jeq	fp_ne_checkround
524	| This is exactly the same 64-bit double shift as seen above.
525	lsl.l	%d2,%d0
526	move.l	%d0,(%a0)+
527	move.l	(%a0),%d0
528	move.l	%d0,%d1
529	lsl.l	%d2,%d0
530	move.l	%d0,(%a0)
531	neg.w	%d2
532	and.w	#0x1f,%d2
533	lsr.l	%d2,%d1
534	or.l	%d1,-(%a0)
535#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
536fp_ne_extra1:
537	clr.l	%d0
538	move.b	(-4,%a0),%d0
539	neg.w	%d2
540	add.w	#24,%d2
541	jcc	1f
542	clr.b	(-4,%a0)
543	lsl.l	%d2,%d0
544	or.l	%d0,(4,%a0)
545	jra	fp_ne_checkround
5461:	addq.w	#8,%d2
547	lsl.l	%d2,%d0
548	move.b	%d0,(-4,%a0)
549	lsr.l	#8,%d0
550	or.l	%d0,(4,%a0)
551#endif
552	jra	fp_ne_checkround
553	| May or may not be subnormal, if so, only 32 bits to shift.
554fp_ne_small2:
555	bfffo	%d0{#0,#32},%d1
556	add.w	#32,%d1
557	move.w	-(%a0),%d2
558	sub.w	%d1,%d2
559	jcc	1f
560	| Beyond pathologically small, denormalize.
561	add.w	%d2,%d1
562	clr.w	%d2
563	fp_set_sr FPSR_EXC_UNFL
5641:	move.w	%d2,(%a0)+
565	ext.l	%d1
566	jeq	fp_ne_checkround
567	clr.l	(4,%a0)
568	sub.w	#32,%d1
569	jcs	1f
570	lsl.l	%d1,%d0			| lower lword needs only to be shifted
571	move.l	%d0,(%a0)		| into the higher lword
572#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
573	clr.l	%d0
574	move.b	(-4,%a0),%d0
575	clr.b	(-4,%a0)
576	neg.w	%d1
577	add.w	#32,%d1
578	bfins	%d0,(%a0){%d1,#8}
579#endif
580	jra	fp_ne_checkround
5811:	neg.w	%d1			| lower lword is splitted between
582	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
583#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
584	jra	fp_ne_checkround
585#else
586	move.w	%d1,%d2
587	jra	fp_ne_extra1
588	| These are extremely small numbers, that will mostly end up as zero
589	| anyway, so this is only important for correct rounding.
590fp_ne_small3:
591	bfffo	%d0{#24,#8},%d1
592	add.w	#40,%d1
593	move.w	-(%a0),%d2
594	sub.w	%d1,%d2
595	jcc	1f
596	| Pathologically small, denormalize.
597	add.w	%d2,%d1
598	clr.w	%d2
5991:	move.w	%d2,(%a0)+
600	ext.l	%d1
601	jeq	fp_ne_checkround
602	cmp.w	#8,%d1
603	jcs	2f
6041:	clr.b	(-4,%a0)
605	sub.w	#64,%d1
606	jcs	1f
607	add.w	#24,%d1
608	lsl.l	%d1,%d0
609	move.l	%d0,(%a0)
610	jra	fp_ne_checkround
6111:	neg.w	%d1
612	bfins	%d0,(%a0){%d1,#8}
613	jra	fp_ne_checkround
6142:	lsl.l	%d1,%d0
615	move.b	%d0,(-4,%a0)
616	lsr.l	#8,%d0
617	move.b	%d0,(7,%a0)
618	jra	fp_ne_checkround
619#endif
620	| Infinities and NaNs, again, same as above.
621fp_ne_large:
622	move.l	(%a0)+,%d0
623	jne	3f
6241:	tst.l	(%a0)
625	jne	4f
6262:	subq.l	#8,%a0
627	printf	PNORM,"%p(",1,%a0
628	printx	PNORM,%a0@
629	printf	PNORM,")\n"
630	rts
631	| we have maybe a NaN, shift off the highest bit
6323:	move.l	%d0,%d1
633	lsl.l	#1,%d1
634	jne	4f
635	clr.l	(-4,%a0)
636	jra	1b
637	| we have a NaN, test if it is signaling
6384:	bset	#30,%d0
639	jne	2b
640	fp_set_sr FPSR_EXC_SNAN
641	move.l	%d0,(-4,%a0)
642	jra	2b
643
644	| these next two do rounding as per the IEEE standard.
645	| values for the rounding modes appear to be:
646	| 0:	Round to nearest
647	| 1:	Round to zero
648	| 2:	Round to -Infinity
649	| 3:	Round to +Infinity
650	| both functions expect that fp_normalize was already
651	| called (and extended argument is already normalized
652	| as far as possible), these are used if there is different
653	| rounding precision is selected and before converting
654	| into single/double
655
656	| fp_normalize_double:
657	| normalize an extended with double (52-bit) precision
658	| args:	 %a0 (struct fp_ext *)
659
660fp_normalize_double:
661	printf	PNORM,"nd: %p(",1,%a0
662	printx	PNORM,%a0@
663	printf	PNORM,"), "
664	move.l	(%a0)+,%d2
665	tst.w	%d2
666	jeq	fp_nd_zero		| zero / denormalized
667	cmp.w	#0x7fff,%d2
668	jeq	fp_nd_huge		| NaN / infinitive.
669	sub.w	#0x4000-0x3ff,%d2	| will the exponent fit?
670	jcs	fp_nd_small		| too small.
671	cmp.w	#0x7fe,%d2
672	jcc	fp_nd_large		| too big.
673	addq.l	#4,%a0
674	move.l	(%a0),%d0		| low lword of mantissa
675	| now, round off the low 11 bits.
676fp_nd_round:
677	moveq	#21,%d1
678	lsl.l	%d1,%d0			| keep 11 low bits.
679	jne	fp_nd_checkround	| Are they non-zero?
680	| nothing to do here
6819:	subq.l	#8,%a0
682	printf	PNORM,"%p(",1,%a0
683	printx	PNORM,%a0@
684	printf	PNORM,")\n"
685	rts
686	| Be careful with the X bit! It contains the lsb
687	| from the shift above, it is needed for round to nearest.
688fp_nd_checkround:
689	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
690	and.w	#0xf800,(2,%a0)		| clear bits 0-10
691	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
692	jne	2f			| %d2 == 0, round to nearest
693	tst.l	%d0			| test guard bit
694	jpl	9b			| zero is closer
695	| here we test the X bit by adding it to %d2
696	clr.w	%d2			| first set z bit, addx only clears it
697	addx.w	%d2,%d2			| test lsb bit
698	| IEEE754-specified "round to even" behaviour.  If the guard
699	| bit is set, then the number is odd, so rounding works like
700	| in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
701	| Otherwise, an equal distance rounds towards zero, so as not
702	| to produce an odd number.  This is strange, but it is what
703	| the standard says.
704	jne	fp_nd_doroundup		| round to infinity
705	lsl.l	#1,%d0			| check low bits
706	jeq	9b			| round to zero
707fp_nd_doroundup:
708	| round (the mantissa, that is) towards infinity
709	add.l	#0x800,(%a0)
710	jcc	9b			| no overflow, good.
711	addq.l	#1,-(%a0)		| extend to high lword
712	jcc	1f			| no overflow, good.
713	| Yow! we have managed to overflow the mantissa.  Since this
714	| only happens when %d1 was 0xfffff800, it is now zero, so
715	| reset the high bit, and increment the exponent.
716	move.w	#0x8000,(%a0)
717	addq.w	#1,-(%a0)
718	cmp.w	#0x43ff,(%a0)+		| exponent now overflown?
719	jeq	fp_nd_large		| yes, so make it infinity.
7201:	subq.l	#4,%a0
721	printf	PNORM,"%p(",1,%a0
722	printx	PNORM,%a0@
723	printf	PNORM,")\n"
724	rts
7252:	subq.w	#2,%d2
726	jcs	9b			| %d2 < 2, round to zero
727	jhi	3f			| %d2 > 2, round to +infinity
728	| Round to +Inf or -Inf.  High word of %d2 contains the
729	| sign of the number, by the way.
730	swap	%d2			| to -inf
731	tst.b	%d2
732	jne	fp_nd_doroundup		| negative, round to infinity
733	jra	9b			| positive, round to zero
7343:	swap	%d2			| to +inf
735	tst.b	%d2
736	jeq	fp_nd_doroundup		| positive, round to infinity
737	jra	9b			| negative, round to zero
738	| Exponent underflow.  Try to make a denormal, and set it to
739	| the smallest possible fraction if this fails.
740fp_nd_small:
741	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
742	move.w	#0x3c01,(-2,%a0)	| 2**-1022
743	neg.w	%d2			| degree of underflow
744	cmp.w	#32,%d2			| single or double shift?
745	jcc	1f
746	| Again, another 64-bit double shift.
747	move.l	(%a0),%d0
748	move.l	%d0,%d1
749	lsr.l	%d2,%d0
750	move.l	%d0,(%a0)+
751	move.l	(%a0),%d0
752	lsr.l	%d2,%d0
753	neg.w	%d2
754	add.w	#32,%d2
755	lsl.l	%d2,%d1
756	or.l	%d1,%d0
757	move.l	(%a0),%d1
758	move.l	%d0,(%a0)
759	| Check to see if we shifted off any significant bits
760	lsl.l	%d2,%d1
761	jeq	fp_nd_round		| Nope, round.
762	bset	#0,%d0			| Yes, so set the "sticky bit".
763	jra	fp_nd_round		| Now, round.
764	| Another 64-bit single shift and store
7651:	sub.w	#32,%d2
766	cmp.w	#32,%d2			| Do we really need to shift?
767	jcc	2f			| No, the number is too small.
768	move.l	(%a0),%d0
769	clr.l	(%a0)+
770	move.l	%d0,%d1
771	lsr.l	%d2,%d0
772	neg.w	%d2
773	add.w	#32,%d2
774	| Again, check to see if we shifted off any significant bits.
775	tst.l	(%a0)
776	jeq	1f
777	bset	#0,%d0			| Sticky bit.
7781:	move.l	%d0,(%a0)
779	lsl.l	%d2,%d1
780	jeq	fp_nd_round
781	bset	#0,%d0
782	jra	fp_nd_round
783	| Sorry, the number is just too small.
7842:	clr.l	(%a0)+
785	clr.l	(%a0)
786	moveq	#1,%d0			| Smallest possible fraction,
787	jra	fp_nd_round		| round as desired.
788	| zero and denormalized
789fp_nd_zero:
790	tst.l	(%a0)+
791	jne	1f
792	tst.l	(%a0)
793	jne	1f
794	subq.l	#8,%a0
795	printf	PNORM,"%p(",1,%a0
796	printx	PNORM,%a0@
797	printf	PNORM,")\n"
798	rts				| zero.  nothing to do.
799	| These are not merely subnormal numbers, but true denormals,
800	| i.e. pathologically small (exponent is 2**-16383) numbers.
801	| It is clearly impossible for even a normal extended number
802	| with that exponent to fit into double precision, so just
803	| write these ones off as "too darn small".
8041:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
805	clr.l	(%a0)
806	clr.l	-(%a0)
807	move.w	#0x3c01,-(%a0)		| i.e. 2**-1022
808	addq.l	#6,%a0
809	moveq	#1,%d0
810	jra	fp_nd_round		| round.
811	| Exponent overflow.  Just call it infinity.
812fp_nd_large:
813	move.w	#0x7ff,%d0
814	and.w	(6,%a0),%d0
815	jeq	1f
816	fp_set_sr FPSR_EXC_INEX2
8171:	fp_set_sr FPSR_EXC_OVFL
818	move.w	(FPD_RND,FPDATA),%d2
819	jne	3f			| %d2 = 0 round to nearest
8201:	move.w	#0x7fff,(-2,%a0)
821	clr.l	(%a0)+
822	clr.l	(%a0)
8232:	subq.l	#8,%a0
824	printf	PNORM,"%p(",1,%a0
825	printx	PNORM,%a0@
826	printf	PNORM,")\n"
827	rts
8283:	subq.w	#2,%d2
829	jcs	5f			| %d2 < 2, round to zero
830	jhi	4f			| %d2 > 2, round to +infinity
831	tst.b	(-3,%a0)		| to -inf
832	jne	1b
833	jra	5f
8344:	tst.b	(-3,%a0)		| to +inf
835	jeq	1b
8365:	move.w	#0x43fe,(-2,%a0)
837	moveq	#-1,%d0
838	move.l	%d0,(%a0)+
839	move.w	#0xf800,%d0
840	move.l	%d0,(%a0)
841	jra	2b
842	| Infinities or NaNs
843fp_nd_huge:
844	subq.l	#4,%a0
845	printf	PNORM,"%p(",1,%a0
846	printx	PNORM,%a0@
847	printf	PNORM,")\n"
848	rts
849
850	| fp_normalize_single:
851	| normalize an extended with single (23-bit) precision
852	| args:	 %a0 (struct fp_ext *)
853
854fp_normalize_single:
855	printf	PNORM,"ns: %p(",1,%a0
856	printx	PNORM,%a0@
857	printf	PNORM,") "
858	addq.l	#2,%a0
859	move.w	(%a0)+,%d2
860	jeq	fp_ns_zero		| zero / denormalized
861	cmp.w	#0x7fff,%d2
862	jeq	fp_ns_huge		| NaN / infinitive.
863	sub.w	#0x4000-0x7f,%d2	| will the exponent fit?
864	jcs	fp_ns_small		| too small.
865	cmp.w	#0xfe,%d2
866	jcc	fp_ns_large		| too big.
867	move.l	(%a0)+,%d0		| get high lword of mantissa
868fp_ns_round:
869	tst.l	(%a0)			| check the low lword
870	jeq	1f
871	| Set a sticky bit if it is non-zero.  This should only
872	| affect the rounding in what would otherwise be equal-
873	| distance situations, which is what we want it to do.
874	bset	#0,%d0
8751:	clr.l	(%a0)			| zap it from memory.
876	| now, round off the low 8 bits of the hi lword.
877	tst.b	%d0			| 8 low bits.
878	jne	fp_ns_checkround	| Are they non-zero?
879	| nothing to do here
880	subq.l	#8,%a0
881	printf	PNORM,"%p(",1,%a0
882	printx	PNORM,%a0@
883	printf	PNORM,")\n"
884	rts
885fp_ns_checkround:
886	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
887	clr.b	-(%a0)			| clear low byte of high lword
888	subq.l	#3,%a0
889	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
890	jne	2f			| %d2 == 0, round to nearest
891	tst.b	%d0			| test guard bit
892	jpl	9f			| zero is closer
893	btst	#8,%d0			| test lsb bit
894	| round to even behaviour, see above.
895	jne	fp_ns_doroundup		| round to infinity
896	lsl.b	#1,%d0			| check low bits
897	jeq	9f			| round to zero
898fp_ns_doroundup:
899	| round (the mantissa, that is) towards infinity
900	add.l	#0x100,(%a0)
901	jcc	9f			| no overflow, good.
902	| Overflow.  This means that the %d1 was 0xffffff00, so it
903	| is now zero.  We will set the mantissa to reflect this, and
904	| increment the exponent (checking for overflow there too)
905	move.w	#0x8000,(%a0)
906	addq.w	#1,-(%a0)
907	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
908	jeq	fp_ns_large		| yes, so make it infinity.
9099:	subq.l	#4,%a0
910	printf	PNORM,"%p(",1,%a0
911	printx	PNORM,%a0@
912	printf	PNORM,")\n"
913	rts
914	| check nondefault rounding modes
9152:	subq.w	#2,%d2
916	jcs	9b			| %d2 < 2, round to zero
917	jhi	3f			| %d2 > 2, round to +infinity
918	tst.b	(-3,%a0)		| to -inf
919	jne	fp_ns_doroundup		| negative, round to infinity
920	jra	9b			| positive, round to zero
9213:	tst.b	(-3,%a0)		| to +inf
922	jeq	fp_ns_doroundup		| positive, round to infinity
923	jra	9b			| negative, round to zero
924	| Exponent underflow.  Try to make a denormal, and set it to
925	| the smallest possible fraction if this fails.
926fp_ns_small:
927	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
928	move.w	#0x3f81,(-2,%a0)	| 2**-126
929	neg.w	%d2			| degree of underflow
930	cmp.w	#32,%d2			| single or double shift?
931	jcc	2f
932	| a 32-bit shift.
933	move.l	(%a0),%d0
934	move.l	%d0,%d1
935	lsr.l	%d2,%d0
936	move.l	%d0,(%a0)+
937	| Check to see if we shifted off any significant bits.
938	neg.w	%d2
939	add.w	#32,%d2
940	lsl.l	%d2,%d1
941	jeq	1f
942	bset	#0,%d0			| Sticky bit.
943	| Check the lower lword
9441:	tst.l	(%a0)
945	jeq	fp_ns_round
946	clr	(%a0)
947	bset	#0,%d0			| Sticky bit.
948	jra	fp_ns_round
949	| Sorry, the number is just too small.
9502:	clr.l	(%a0)+
951	clr.l	(%a0)
952	moveq	#1,%d0			| Smallest possible fraction,
953	jra	fp_ns_round		| round as desired.
954	| Exponent overflow.  Just call it infinity.
955fp_ns_large:
956	tst.b	(3,%a0)
957	jeq	1f
958	fp_set_sr FPSR_EXC_INEX2
9591:	fp_set_sr FPSR_EXC_OVFL
960	move.w	(FPD_RND,FPDATA),%d2
961	jne	3f			| %d2 = 0 round to nearest
9621:	move.w	#0x7fff,(-2,%a0)
963	clr.l	(%a0)+
964	clr.l	(%a0)
9652:	subq.l	#8,%a0
966	printf	PNORM,"%p(",1,%a0
967	printx	PNORM,%a0@
968	printf	PNORM,")\n"
969	rts
9703:	subq.w	#2,%d2
971	jcs	5f			| %d2 < 2, round to zero
972	jhi	4f			| %d2 > 2, round to +infinity
973	tst.b	(-3,%a0)		| to -inf
974	jne	1b
975	jra	5f
9764:	tst.b	(-3,%a0)		| to +inf
977	jeq	1b
9785:	move.w	#0x407e,(-2,%a0)
979	move.l	#0xffffff00,(%a0)+
980	clr.l	(%a0)
981	jra	2b
982	| zero and denormalized
983fp_ns_zero:
984	tst.l	(%a0)+
985	jne	1f
986	tst.l	(%a0)
987	jne	1f
988	subq.l	#8,%a0
989	printf	PNORM,"%p(",1,%a0
990	printx	PNORM,%a0@
991	printf	PNORM,")\n"
992	rts				| zero.  nothing to do.
993	| These are not merely subnormal numbers, but true denormals,
994	| i.e. pathologically small (exponent is 2**-16383) numbers.
995	| It is clearly impossible for even a normal extended number
996	| with that exponent to fit into single precision, so just
997	| write these ones off as "too darn small".
9981:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
999	clr.l	(%a0)
1000	clr.l	-(%a0)
1001	move.w	#0x3f81,-(%a0)		| i.e. 2**-126
1002	addq.l	#6,%a0
1003	moveq	#1,%d0
1004	jra	fp_ns_round		| round.
1005	| Infinities or NaNs
1006fp_ns_huge:
1007	subq.l	#4,%a0
1008	printf	PNORM,"%p(",1,%a0
1009	printx	PNORM,%a0@
1010	printf	PNORM,")\n"
1011	rts
1012
1013	| fp_normalize_single_fast:
1014	| normalize an extended with single (23-bit) precision
1015	| this is only used by fsgldiv/fsgdlmul, where the
1016	| operand is not completly normalized.
1017	| args:	 %a0 (struct fp_ext *)
1018
1019fp_normalize_single_fast:
1020	printf	PNORM,"nsf: %p(",1,%a0
1021	printx	PNORM,%a0@
1022	printf	PNORM,") "
1023	addq.l	#2,%a0
1024	move.w	(%a0)+,%d2
1025	cmp.w	#0x7fff,%d2
1026	jeq	fp_nsf_huge		| NaN / infinitive.
1027	move.l	(%a0)+,%d0		| get high lword of mantissa
1028fp_nsf_round:
1029	tst.l	(%a0)			| check the low lword
1030	jeq	1f
1031	| Set a sticky bit if it is non-zero.  This should only
1032	| affect the rounding in what would otherwise be equal-
1033	| distance situations, which is what we want it to do.
1034	bset	#0,%d0
10351:	clr.l	(%a0)			| zap it from memory.
1036	| now, round off the low 8 bits of the hi lword.
1037	tst.b	%d0			| 8 low bits.
1038	jne	fp_nsf_checkround	| Are they non-zero?
1039	| nothing to do here
1040	subq.l	#8,%a0
1041	printf	PNORM,"%p(",1,%a0
1042	printx	PNORM,%a0@
1043	printf	PNORM,")\n"
1044	rts
1045fp_nsf_checkround:
1046	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
1047	clr.b	-(%a0)			| clear low byte of high lword
1048	subq.l	#3,%a0
1049	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1050	jne	2f			| %d2 == 0, round to nearest
1051	tst.b	%d0			| test guard bit
1052	jpl	9f			| zero is closer
1053	btst	#8,%d0			| test lsb bit
1054	| round to even behaviour, see above.
1055	jne	fp_nsf_doroundup		| round to infinity
1056	lsl.b	#1,%d0			| check low bits
1057	jeq	9f			| round to zero
1058fp_nsf_doroundup:
1059	| round (the mantissa, that is) towards infinity
1060	add.l	#0x100,(%a0)
1061	jcc	9f			| no overflow, good.
1062	| Overflow.  This means that the %d1 was 0xffffff00, so it
1063	| is now zero.  We will set the mantissa to reflect this, and
1064	| increment the exponent (checking for overflow there too)
1065	move.w	#0x8000,(%a0)
1066	addq.w	#1,-(%a0)
1067	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
1068	jeq	fp_nsf_large		| yes, so make it infinity.
10699:	subq.l	#4,%a0
1070	printf	PNORM,"%p(",1,%a0
1071	printx	PNORM,%a0@
1072	printf	PNORM,")\n"
1073	rts
1074	| check nondefault rounding modes
10752:	subq.w	#2,%d2
1076	jcs	9b			| %d2 < 2, round to zero
1077	jhi	3f			| %d2 > 2, round to +infinity
1078	tst.b	(-3,%a0)		| to -inf
1079	jne	fp_nsf_doroundup	| negative, round to infinity
1080	jra	9b			| positive, round to zero
10813:	tst.b	(-3,%a0)		| to +inf
1082	jeq	fp_nsf_doroundup		| positive, round to infinity
1083	jra	9b			| negative, round to zero
1084	| Exponent overflow.  Just call it infinity.
1085fp_nsf_large:
1086	tst.b	(3,%a0)
1087	jeq	1f
1088	fp_set_sr FPSR_EXC_INEX2
10891:	fp_set_sr FPSR_EXC_OVFL
1090	move.w	(FPD_RND,FPDATA),%d2
1091	jne	3f			| %d2 = 0 round to nearest
10921:	move.w	#0x7fff,(-2,%a0)
1093	clr.l	(%a0)+
1094	clr.l	(%a0)
10952:	subq.l	#8,%a0
1096	printf	PNORM,"%p(",1,%a0
1097	printx	PNORM,%a0@
1098	printf	PNORM,")\n"
1099	rts
11003:	subq.w	#2,%d2
1101	jcs	5f			| %d2 < 2, round to zero
1102	jhi	4f			| %d2 > 2, round to +infinity
1103	tst.b	(-3,%a0)		| to -inf
1104	jne	1b
1105	jra	5f
11064:	tst.b	(-3,%a0)		| to +inf
1107	jeq	1b
11085:	move.w	#0x407e,(-2,%a0)
1109	move.l	#0xffffff00,(%a0)+
1110	clr.l	(%a0)
1111	jra	2b
1112	| Infinities or NaNs
1113fp_nsf_huge:
1114	subq.l	#4,%a0
1115	printf	PNORM,"%p(",1,%a0
1116	printx	PNORM,%a0@
1117	printf	PNORM,")\n"
1118	rts
1119
1120	| conv_ext2int (macro):
1121	| Generates a subroutine that converts an extended value to an
1122	| integer of a given size, again, with the appropriate type of
1123	| rounding.
1124
1125	| Macro arguments:
1126	| s:	size, as given in an assembly instruction.
1127	| b:	number of bits in that size.
1128
1129	| Subroutine arguments:
1130	| %a0:	source (struct fp_ext *)
1131
1132	| Returns the integer in %d0 (like it should)
1133
1134.macro conv_ext2int s,b
1135	.set	inf,(1<<(\b-1))-1	| i.e. MAXINT
1136	printf	PCONV,"e2i%d: %p(",2,#\b,%a0
1137	printx	PCONV,%a0@
1138	printf	PCONV,") "
1139	addq.l	#2,%a0
1140	move.w	(%a0)+,%d2		| exponent
1141	jeq	fp_e2i_zero\b		| zero / denorm (== 0, here)
1142	cmp.w	#0x7fff,%d2
1143	jeq	fp_e2i_huge\b		| Inf / NaN
1144	sub.w	#0x3ffe,%d2
1145	jcs	fp_e2i_small\b
1146	cmp.w	#\b,%d2
1147	jhi	fp_e2i_large\b
1148	move.l	(%a0),%d0
1149	move.l	%d0,%d1
1150	lsl.l	%d2,%d1
1151	jne	fp_e2i_round\b
1152	tst.l	(4,%a0)
1153	jne	fp_e2i_round\b
1154	neg.w	%d2
1155	add.w	#32,%d2
1156	lsr.l	%d2,%d0
11579:	tst.w	(-4,%a0)
1158	jne	1f
1159	tst.\s	%d0
1160	jmi	fp_e2i_large\b
1161	printf	PCONV,"-> %p\n",1,%d0
1162	rts
11631:	neg.\s	%d0
1164	jeq	1f
1165	jpl	fp_e2i_large\b
11661:	printf	PCONV,"-> %p\n",1,%d0
1167	rts
1168fp_e2i_round\b:
1169	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
1170	neg.w	%d2
1171	add.w	#32,%d2
1172	.if	\b>16
1173	jeq	5f
1174	.endif
1175	lsr.l	%d2,%d0
1176	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1177	jne	2f			| %d2 == 0, round to nearest
1178	tst.l	%d1			| test guard bit
1179	jpl	9b			| zero is closer
1180	btst	%d2,%d0			| test lsb bit (%d2 still 0)
1181	jne	fp_e2i_doroundup\b
1182	lsl.l	#1,%d1			| check low bits
1183	jne	fp_e2i_doroundup\b
1184	tst.l	(4,%a0)
1185	jeq	9b
1186fp_e2i_doroundup\b:
1187	addq.l	#1,%d0
1188	jra	9b
1189	| check nondefault rounding modes
11902:	subq.w	#2,%d2
1191	jcs	9b			| %d2 < 2, round to zero
1192	jhi	3f			| %d2 > 2, round to +infinity
1193	tst.w	(-4,%a0)		| to -inf
1194	jne	fp_e2i_doroundup\b	| negative, round to infinity
1195	jra	9b			| positive, round to zero
11963:	tst.w	(-4,%a0)		| to +inf
1197	jeq	fp_e2i_doroundup\b	| positive, round to infinity
1198	jra	9b	| negative, round to zero
1199	| we are only want -2**127 get correctly rounded here,
1200	| since the guard bit is in the lower lword.
1201	| everything else ends up anyway as overflow.
1202	.if	\b>16
12035:	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1204	jne	2b			| %d2 == 0, round to nearest
1205	move.l	(4,%a0),%d1		| test guard bit
1206	jpl	9b			| zero is closer
1207	lsl.l	#1,%d1			| check low bits
1208	jne	fp_e2i_doroundup\b
1209	jra	9b
1210	.endif
1211fp_e2i_zero\b:
1212	clr.l	%d0
1213	tst.l	(%a0)+
1214	jne	1f
1215	tst.l	(%a0)
1216	jeq	3f
12171:	subq.l	#4,%a0
1218	fp_clr_sr FPSR_EXC_UNFL		| fp_normalize_ext has set this bit
1219fp_e2i_small\b:
1220	fp_set_sr FPSR_EXC_INEX2
1221	clr.l	%d0
1222	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1223	subq.w	#2,%d2
1224	jcs	3f			| %d2 < 2, round to nearest/zero
1225	jhi	2f			| %d2 > 2, round to +infinity
1226	tst.w	(-4,%a0)		| to -inf
1227	jeq	3f
1228	subq.\s	#1,%d0
1229	jra	3f
12302:	tst.w	(-4,%a0)		| to +inf
1231	jne	3f
1232	addq.\s	#1,%d0
12333:	printf	PCONV,"-> %p\n",1,%d0
1234	rts
1235fp_e2i_large\b:
1236	fp_set_sr FPSR_EXC_OPERR
1237	move.\s	#inf,%d0
1238	tst.w	(-4,%a0)
1239	jeq	1f
1240	addq.\s	#1,%d0
12411:	printf	PCONV,"-> %p\n",1,%d0
1242	rts
1243fp_e2i_huge\b:
1244	move.\s	(%a0),%d0
1245	tst.l	(%a0)
1246	jne	1f
1247	tst.l	(%a0)
1248	jeq	fp_e2i_large\b
1249	| fp_normalize_ext has set this bit already
1250	| and made the number nonsignaling
12511:	fp_tst_sr FPSR_EXC_SNAN
1252	jne	1f
1253	fp_set_sr FPSR_EXC_OPERR
12541:	printf	PCONV,"-> %p\n",1,%d0
1255	rts
1256.endm
1257
1258fp_conv_ext2long:
1259	conv_ext2int l,32
1260
1261fp_conv_ext2short:
1262	conv_ext2int w,16
1263
1264fp_conv_ext2byte:
1265	conv_ext2int b,8
1266
1267fp_conv_ext2double:
1268	jsr	fp_normalize_double
1269	printf	PCONV,"e2d: %p(",1,%a0
1270	printx	PCONV,%a0@
1271	printf	PCONV,"), "
1272	move.l	(%a0)+,%d2
1273	cmp.w	#0x7fff,%d2
1274	jne	1f
1275	move.w	#0x7ff,%d2
1276	move.l	(%a0)+,%d0
1277	jra	2f
12781:	sub.w	#0x3fff-0x3ff,%d2
1279	move.l	(%a0)+,%d0
1280	jmi	2f
1281	clr.w	%d2
12822:	lsl.w	#5,%d2
1283	lsl.l	#7,%d2
1284	lsl.l	#8,%d2
1285	move.l	%d0,%d1
1286	lsl.l	#1,%d0
1287	lsr.l	#4,%d0
1288	lsr.l	#8,%d0
1289	or.l	%d2,%d0
1290	putuser.l %d0,(%a1)+,fp_err_ua2,%a1
1291	moveq	#21,%d0
1292	lsl.l	%d0,%d1
1293	move.l	(%a0),%d0
1294	lsr.l	#4,%d0
1295	lsr.l	#7,%d0
1296	or.l	%d1,%d0
1297	putuser.l %d0,(%a1),fp_err_ua2,%a1
1298#ifdef FPU_EMU_DEBUG
1299	getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
1300	getuser.l %a1@(0),%d1,fp_err_ua2,%a1
1301	printf	PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
1302#endif
1303	rts
1304
1305fp_conv_ext2single:
1306	jsr	fp_normalize_single
1307	printf	PCONV,"e2s: %p(",1,%a0
1308	printx	PCONV,%a0@
1309	printf	PCONV,"), "
1310	move.l	(%a0)+,%d1
1311	cmp.w	#0x7fff,%d1
1312	jne	1f
1313	move.w	#0xff,%d1
1314	move.l	(%a0)+,%d0
1315	jra	2f
13161:	sub.w	#0x3fff-0x7f,%d1
1317	move.l	(%a0)+,%d0
1318	jmi	2f
1319	clr.w	%d1
13202:	lsl.w	#8,%d1
1321	lsl.l	#7,%d1
1322	lsl.l	#8,%d1
1323	bclr	#31,%d0
1324	lsr.l	#8,%d0
1325	or.l	%d1,%d0
1326	printf	PCONV,"%08x\n",1,%d0
1327	rts
1328
1329	| special return addresses for instr that
1330	| encode the rounding precision in the opcode
1331	| (e.g. fsmove,fdmove)
1332
1333fp_finalrounding_single:
1334	addq.l	#8,%sp
1335	jsr	fp_normalize_ext
1336	jsr	fp_normalize_single
1337	jra	fp_finaltest
1338
1339fp_finalrounding_single_fast:
1340	addq.l	#8,%sp
1341	jsr	fp_normalize_ext
1342	jsr	fp_normalize_single_fast
1343	jra	fp_finaltest
1344
1345fp_finalrounding_double:
1346	addq.l	#8,%sp
1347	jsr	fp_normalize_ext
1348	jsr	fp_normalize_double
1349	jra	fp_finaltest
1350
1351	| fp_finaltest:
1352	| set the emulated status register based on the outcome of an
1353	| emulated instruction.
1354
1355fp_finalrounding:
1356	addq.l	#8,%sp
1357|	printf	,"f: %p\n",1,%a0
1358	jsr	fp_normalize_ext
1359	move.w	(FPD_PREC,FPDATA),%d0
1360	subq.w	#1,%d0
1361	jcs	fp_finaltest
1362	jne	1f
1363	jsr	fp_normalize_single
1364	jra	2f
13651:	jsr	fp_normalize_double
13662:|	printf	,"f: %p\n",1,%a0
1367fp_finaltest:
1368	| First, we do some of the obvious tests for the exception
1369	| status byte and condition code bytes of fp_sr here, so that
1370	| they do not have to be handled individually by every
1371	| emulated instruction.
1372	clr.l	%d0
1373	addq.l	#1,%a0
1374	tst.b	(%a0)+			| sign
1375	jeq	1f
1376	bset	#FPSR_CC_NEG-24,%d0	| N bit
13771:	cmp.w	#0x7fff,(%a0)+		| exponent
1378	jeq	2f
1379	| test for zero
1380	moveq	#FPSR_CC_Z-24,%d1
1381	tst.l	(%a0)+
1382	jne	9f
1383	tst.l	(%a0)
1384	jne	9f
1385	jra	8f
1386	| infinitiv and NAN
13872:	moveq	#FPSR_CC_NAN-24,%d1
1388	move.l	(%a0)+,%d2
1389	lsl.l	#1,%d2			| ignore high bit
1390	jne	8f
1391	tst.l	(%a0)
1392	jne	8f
1393	moveq	#FPSR_CC_INF-24,%d1
13948:	bset	%d1,%d0
13959:	move.b	%d0,(FPD_FPSR+0,FPDATA)	| set condition test result
1396	| move instructions enter here
1397	| Here, we test things in the exception status byte, and set
1398	| other things in the accrued exception byte accordingly.
1399	| Emulated instructions can set various things in the former,
1400	| as defined in fp_emu.h.
1401fp_final:
1402	move.l	(FPD_FPSR,FPDATA),%d0
1403	| same as above, greatly optimized, but untested (yet)
1404	move.l	%d0,%d2
1405	lsr.l	#5,%d0
1406	move.l	%d0,%d1
1407	lsr.l	#4,%d1
1408	or.l	%d0,%d1
1409	and.b	#0x08,%d1
1410	move.l	%d2,%d0
1411	lsr.l	#6,%d0
1412	or.l	%d1,%d0
1413	move.l	%d2,%d1
1414	lsr.l	#4,%d1
1415	or.b	#0xdf,%d1
1416	and.b	%d1,%d0
1417	move.l	%d2,%d1
1418	lsr.l	#7,%d1
1419	and.b	#0x80,%d1
1420	or.b	%d1,%d0
1421	and.b	#0xf8,%d0
1422	or.b	%d0,%d2
1423	move.l	%d2,(FPD_FPSR,FPDATA)
1424	move.b	(FPD_FPSR+2,FPDATA),%d0
1425	and.b	(FPD_FPCR+2,FPDATA),%d0
1426	jeq	1f
1427	printf	,"send signal!!!\n"
14281:	jra	fp_end
1429