1#
2# $NetBSD: fpsp.s,v 1.3.2.3 2004/09/21 13:17:25 skrll Exp $
3#
4
5#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
7# M68000 Hi-Performance Microprocessor Division
8# M68060 Software Package Production Release
9#
10# M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
11# All rights reserved.
12#
13# THE SOFTWARE is provided on an "AS IS" basis and without warranty.
14# To the maximum extent permitted by applicable law,
15# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
16# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
17# FOR A PARTICULAR PURPOSE and any warranty against infringement with
18# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
19# and any accompanying written materials.
20#
21# To the maximum extent permitted by applicable law,
22# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
23# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
24# BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
25# ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
26#
27# Motorola assumes no responsibility for the maintenance and support
28# of the SOFTWARE.
29#
30# You are hereby granted a copyright license to use, modify, and distribute the
31# SOFTWARE so long as this entire notice is retained without alteration
32# in any modified and/or redistributed versions, and that such modified
33# versions are clearly identified as such.
34# No licenses are granted by implication, estoppel or otherwise under any
35# patents or trademarks of Motorola, Inc.
36#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
37
38#
39# freal.s:
40#	This file is appended to the top of the 060FPSP package
41# and contains the entry points into the package. The user, in
42# effect, branches to one of the branch table entries located
43# after _060FPSP_TABLE.
44#	Also, subroutine stubs exist in this file (_fpsp_done for
45# example) that are referenced by the FPSP package itself in order
46# to call a given routine. The stub routine actually performs the
47# callout. The FPSP code does a "bsr" to the stub routine. This
48# extra layer of hierarchy adds a slight performance penalty but
49# it makes the FPSP code easier to read and more mainatinable.
50#
51
52set	_off_bsun,	0x00
53set	_off_snan,	0x04
54set	_off_operr,	0x08
55set	_off_ovfl,	0x0c
56set	_off_unfl,	0x10
57set	_off_dz,	0x14
58set	_off_inex,	0x18
59set	_off_fline,	0x1c
60set	_off_fpu_dis,	0x20
61set	_off_trap,	0x24
62set	_off_trace,	0x28
63set	_off_access,	0x2c
64set	_off_done,	0x30
65
66set	_off_imr,	0x40
67set	_off_dmr,	0x44
68set	_off_dmw,	0x48
69set	_off_irw,	0x4c
70set	_off_irl,	0x50
71set	_off_drb,	0x54
72set	_off_drw,	0x58
73set	_off_drl,	0x5c
74set	_off_dwb,	0x60
75set	_off_dww,	0x64
76set	_off_dwl,	0x68
77
78_060FPSP_TABLE:
79
80###############################################################
81
82# Here's the table of ENTRY POINTS for those linking the package.
83	bra.l		_fpsp_snan
84	short		0x0000
85	bra.l		_fpsp_operr
86	short		0x0000
87	bra.l		_fpsp_ovfl
88	short		0x0000
89	bra.l		_fpsp_unfl
90	short		0x0000
91	bra.l		_fpsp_dz
92	short		0x0000
93	bra.l		_fpsp_inex
94	short		0x0000
95	bra.l		_fpsp_fline
96	short		0x0000
97	bra.l		_fpsp_unsupp
98	short		0x0000
99	bra.l		_fpsp_effadd
100	short		0x0000
101
102	space 		56
103
104###############################################################
105	global		_fpsp_done
106_fpsp_done:
107	mov.l		%d0,-(%sp)
108	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
109	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
110	mov.l		0x4(%sp),%d0
111	rtd		&0x4
112
113	global		_real_ovfl
114_real_ovfl:
115	mov.l		%d0,-(%sp)
116	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
117	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
118	mov.l		0x4(%sp),%d0
119	rtd		&0x4
120
121	global		_real_unfl
122_real_unfl:
123	mov.l		%d0,-(%sp)
124	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
125	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
126	mov.l		0x4(%sp),%d0
127	rtd		&0x4
128
129	global		_real_inex
130_real_inex:
131	mov.l		%d0,-(%sp)
132	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
133	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
134	mov.l		0x4(%sp),%d0
135	rtd		&0x4
136
137	global		_real_bsun
138_real_bsun:
139	mov.l		%d0,-(%sp)
140	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
141	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
142	mov.l		0x4(%sp),%d0
143	rtd		&0x4
144
145	global		_real_operr
146_real_operr:
147	mov.l		%d0,-(%sp)
148	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
149	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
150	mov.l		0x4(%sp),%d0
151	rtd		&0x4
152
153	global		_real_snan
154_real_snan:
155	mov.l		%d0,-(%sp)
156	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
157	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
158	mov.l		0x4(%sp),%d0
159	rtd		&0x4
160
161	global		_real_dz
162_real_dz:
163	mov.l		%d0,-(%sp)
164	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
165	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
166	mov.l		0x4(%sp),%d0
167	rtd		&0x4
168
169	global		_real_fline
170_real_fline:
171	mov.l		%d0,-(%sp)
172	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
173	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
174	mov.l		0x4(%sp),%d0
175	rtd		&0x4
176
177	global		_real_fpu_disabled
178_real_fpu_disabled:
179	mov.l		%d0,-(%sp)
180	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
181	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
182	mov.l		0x4(%sp),%d0
183	rtd		&0x4
184
185	global		_real_trap
186_real_trap:
187	mov.l		%d0,-(%sp)
188	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
189	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
190	mov.l		0x4(%sp),%d0
191	rtd		&0x4
192
193	global		_real_trace
194_real_trace:
195	mov.l		%d0,-(%sp)
196	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
197	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
198	mov.l		0x4(%sp),%d0
199	rtd		&0x4
200
201	global		_real_access
202_real_access:
203	mov.l		%d0,-(%sp)
204	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
205	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
206	mov.l		0x4(%sp),%d0
207	rtd		&0x4
208
209#######################################
210
211	global		_imem_read
212_imem_read:
213	mov.l		%d0,-(%sp)
214	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
215	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
216	mov.l		0x4(%sp),%d0
217	rtd		&0x4
218
219	global		_dmem_read
220_dmem_read:
221	mov.l		%d0,-(%sp)
222	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
223	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
224	mov.l		0x4(%sp),%d0
225	rtd		&0x4
226
227	global		_dmem_write
228_dmem_write:
229	mov.l		%d0,-(%sp)
230	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
231	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
232	mov.l		0x4(%sp),%d0
233	rtd		&0x4
234
235	global		_imem_read_word
236_imem_read_word:
237	mov.l		%d0,-(%sp)
238	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
239	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
240	mov.l		0x4(%sp),%d0
241	rtd		&0x4
242
243	global		_imem_read_long
244_imem_read_long:
245	mov.l		%d0,-(%sp)
246	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
247	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
248	mov.l		0x4(%sp),%d0
249	rtd		&0x4
250
251	global		_dmem_read_byte
252_dmem_read_byte:
253	mov.l		%d0,-(%sp)
254	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
255	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
256	mov.l		0x4(%sp),%d0
257	rtd		&0x4
258
259	global		_dmem_read_word
260_dmem_read_word:
261	mov.l		%d0,-(%sp)
262	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
263	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
264	mov.l		0x4(%sp),%d0
265	rtd		&0x4
266
267	global		_dmem_read_long
268_dmem_read_long:
269	mov.l		%d0,-(%sp)
270	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
271	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
272	mov.l		0x4(%sp),%d0
273	rtd		&0x4
274
275	global		_dmem_write_byte
276_dmem_write_byte:
277	mov.l		%d0,-(%sp)
278	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
279	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
280	mov.l		0x4(%sp),%d0
281	rtd		&0x4
282
283	global		_dmem_write_word
284_dmem_write_word:
285	mov.l		%d0,-(%sp)
286	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
287	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
288	mov.l		0x4(%sp),%d0
289	rtd		&0x4
290
291	global		_dmem_write_long
292_dmem_write_long:
293	mov.l		%d0,-(%sp)
294	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
295	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
296	mov.l		0x4(%sp),%d0
297	rtd		&0x4
298
299#
300# This file contains a set of define statements for constants
301# in order to promote readability within the corecode itself.
302#
303
304set LOCAL_SIZE,		192			# stack frame size(bytes)
305set LV,			-LOCAL_SIZE		# stack offset
306
307set EXC_SR,		0x4			# stack status register
308set EXC_PC,		0x6			# stack pc
309set EXC_VOFF,		0xa			# stacked vector offset
310set EXC_EA,		0xc			# stacked <ea>
311
312set EXC_FP,		0x0			# frame pointer
313
314set EXC_AREGS,		-68			# offset of all address regs
315set EXC_DREGS,		-100			# offset of all data regs
316set EXC_FPREGS,		-36			# offset of all fp regs
317
318set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
319set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
320set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
321set EXC_A5,		EXC_AREGS+(5*4)
322set EXC_A4,		EXC_AREGS+(4*4)
323set EXC_A3,		EXC_AREGS+(3*4)
324set EXC_A2,		EXC_AREGS+(2*4)
325set EXC_A1,		EXC_AREGS+(1*4)
326set EXC_A0,		EXC_AREGS+(0*4)
327set EXC_D7,		EXC_DREGS+(7*4)
328set EXC_D6,		EXC_DREGS+(6*4)
329set EXC_D5,		EXC_DREGS+(5*4)
330set EXC_D4,		EXC_DREGS+(4*4)
331set EXC_D3,		EXC_DREGS+(3*4)
332set EXC_D2,		EXC_DREGS+(2*4)
333set EXC_D1,		EXC_DREGS+(1*4)
334set EXC_D0,		EXC_DREGS+(0*4)
335
336set EXC_FP0, 		EXC_FPREGS+(0*12)	# offset of saved fp0
337set EXC_FP1, 		EXC_FPREGS+(1*12)	# offset of saved fp1
338set EXC_FP2, 		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
339
340set FP_SCR1, 		LV+80			# fp scratch 1
341set FP_SCR1_EX, 	FP_SCR1+0
342set FP_SCR1_SGN,	FP_SCR1+2
343set FP_SCR1_HI, 	FP_SCR1+4
344set FP_SCR1_LO, 	FP_SCR1+8
345
346set FP_SCR0, 		LV+68			# fp scratch 0
347set FP_SCR0_EX, 	FP_SCR0+0
348set FP_SCR0_SGN,	FP_SCR0+2
349set FP_SCR0_HI, 	FP_SCR0+4
350set FP_SCR0_LO, 	FP_SCR0+8
351
352set FP_DST, 		LV+56			# fp destination operand
353set FP_DST_EX, 		FP_DST+0
354set FP_DST_SGN,		FP_DST+2
355set FP_DST_HI, 		FP_DST+4
356set FP_DST_LO, 		FP_DST+8
357
358set FP_SRC, 		LV+44			# fp source operand
359set FP_SRC_EX, 		FP_SRC+0
360set FP_SRC_SGN,		FP_SRC+2
361set FP_SRC_HI, 		FP_SRC+4
362set FP_SRC_LO, 		FP_SRC+8
363
364set USER_FPIAR,		LV+40			# FP instr address register
365
366set USER_FPSR,		LV+36			# FP status register
367set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
368set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
369set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
370set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
371
372set USER_FPCR,		LV+32			# FP control register
373set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
374set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
375
376set L_SCR3,		LV+28			# integer scratch 3
377set L_SCR2,		LV+24			# integer scratch 2
378set L_SCR1,		LV+20			# integer scratch 1
379
380set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
381
382set EXC_TEMP2,		LV+24			# temporary space
383set EXC_TEMP,		LV+16			# temporary space
384
385set DTAG,		LV+15			# destination operand type
386set STAG, 		LV+14			# source operand type
387
388set SPCOND_FLG,		LV+10			# flag: special case (see below)
389
390set EXC_CC,		LV+8			# saved condition codes
391set EXC_EXTWPTR,	LV+4			# saved current PC (active)
392set EXC_EXTWORD,	LV+2			# saved extension word
393set EXC_CMDREG,		LV+2			# saved extension word
394set EXC_OPWORD,		LV+0			# saved operation word
395
396################################
397
398# Helpful macros
399
400set FTEMP,		0			# offsets within an
401set FTEMP_EX, 		0			# extended precision
402set FTEMP_SGN,		2			# value saved in memory.
403set FTEMP_HI, 		4
404set FTEMP_LO, 		8
405set FTEMP_GRS,		12
406
407set LOCAL,		0			# offsets within an
408set LOCAL_EX, 		0			# extended precision
409set LOCAL_SGN,		2			# value saved in memory.
410set LOCAL_HI, 		4
411set LOCAL_LO, 		8
412set LOCAL_GRS,		12
413
414set DST,		0			# offsets within an
415set DST_EX,		0			# extended precision
416set DST_HI,		4			# value saved in memory.
417set DST_LO,		8
418
419set SRC,		0			# offsets within an
420set SRC_EX,		0			# extended precision
421set SRC_HI,		4			# value saved in memory.
422set SRC_LO,		8
423
424set SGL_LO,		0x3f81			# min sgl prec exponent
425set SGL_HI,		0x407e			# max sgl prec exponent
426set DBL_LO,		0x3c01			# min dbl prec exponent
427set DBL_HI,		0x43fe			# max dbl prec exponent
428set EXT_LO,		0x0			# min ext prec exponent
429set EXT_HI,		0x7ffe			# max ext prec exponent
430
431set EXT_BIAS,		0x3fff			# extended precision bias
432set SGL_BIAS,		0x007f			# single precision bias
433set DBL_BIAS,		0x03ff			# double precision bias
434
435set NORM,		0x00			# operand type for STAG/DTAG
436set ZERO,		0x01			# operand type for STAG/DTAG
437set INF,		0x02			# operand type for STAG/DTAG
438set QNAN,		0x03			# operand type for STAG/DTAG
439set DENORM,		0x04			# operand type for STAG/DTAG
440set SNAN,		0x05			# operand type for STAG/DTAG
441set UNNORM,		0x06			# operand type for STAG/DTAG
442
443##################
444# FPSR/FPCR bits #
445##################
446set neg_bit,		0x3			# negative result
447set z_bit,		0x2			# zero result
448set inf_bit,		0x1			# infinite result
449set nan_bit,		0x0			# NAN result
450
451set q_sn_bit,		0x7			# sign bit of quotient byte
452
453set bsun_bit,		7			# branch on unordered
454set snan_bit,		6			# signalling NAN
455set operr_bit,		5			# operand error
456set ovfl_bit,		4			# overflow
457set unfl_bit,		3			# underflow
458set dz_bit,		2			# divide by zero
459set inex2_bit,		1			# inexact result 2
460set inex1_bit,		0			# inexact result 1
461
462set aiop_bit,		7			# accrued inexact operation bit
463set aovfl_bit,		6			# accrued overflow bit
464set aunfl_bit,		5			# accrued underflow bit
465set adz_bit,		4			# accrued dz bit
466set ainex_bit,		3			# accrued inexact bit
467
468#############################
469# FPSR individual bit masks #
470#############################
471set neg_mask,		0x08000000		# negative bit mask (lw)
472set inf_mask,		0x02000000		# infinity bit mask (lw)
473set z_mask,		0x04000000		# zero bit mask (lw)
474set nan_mask,		0x01000000		# nan bit mask (lw)
475
476set neg_bmask,		0x08			# negative bit mask (byte)
477set inf_bmask,		0x02			# infinity bit mask (byte)
478set z_bmask,		0x04			# zero bit mask (byte)
479set nan_bmask,		0x01			# nan bit mask (byte)
480
481set bsun_mask,		0x00008000		# bsun exception mask
482set snan_mask,		0x00004000		# snan exception mask
483set operr_mask,		0x00002000		# operr exception mask
484set ovfl_mask,		0x00001000		# overflow exception mask
485set unfl_mask,		0x00000800		# underflow exception mask
486set dz_mask,		0x00000400		# dz exception mask
487set inex2_mask,		0x00000200		# inex2 exception mask
488set inex1_mask,		0x00000100		# inex1 exception mask
489
490set aiop_mask,		0x00000080		# accrued illegal operation
491set aovfl_mask,		0x00000040		# accrued overflow
492set aunfl_mask,		0x00000020		# accrued underflow
493set adz_mask,		0x00000010		# accrued divide by zero
494set ainex_mask,		0x00000008		# accrued inexact
495
496######################################
497# FPSR combinations used in the FPSP #
498######################################
499set dzinf_mask,		inf_mask+dz_mask+adz_mask
500set opnan_mask,		nan_mask+operr_mask+aiop_mask
501set nzi_mask,		0x01ffffff 		#clears N, Z, and I
502set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
503set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
504set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
505set inx1a_mask,		inex1_mask+ainex_mask
506set inx2a_mask,		inex2_mask+ainex_mask
507set snaniop_mask, 	nan_mask+snan_mask+aiop_mask
508set snaniop2_mask,	snan_mask+aiop_mask
509set naniop_mask,	nan_mask+aiop_mask
510set neginf_mask,	neg_mask+inf_mask
511set infaiop_mask, 	inf_mask+aiop_mask
512set negz_mask,		neg_mask+z_mask
513set opaop_mask,		operr_mask+aiop_mask
514set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
515set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
516
517#########
518# misc. #
519#########
520set rnd_stky_bit,	29			# stky bit pos in longword
521
522set sign_bit,		0x7			# sign bit
523set signan_bit,		0x6			# signalling nan bit
524
525set sgl_thresh,		0x3f81			# minimum sgl exponent
526set dbl_thresh,		0x3c01			# minimum dbl exponent
527
528set x_mode,		0x0			# extended precision
529set s_mode,		0x4			# single precision
530set d_mode,		0x8			# double precision
531
532set rn_mode,		0x0			# round-to-nearest
533set rz_mode,		0x1			# round-to-zero
534set rm_mode,		0x2			# round-tp-minus-infinity
535set rp_mode,		0x3			# round-to-plus-infinity
536
537set mantissalen,	64			# length of mantissa in bits
538
539set BYTE,		1			# len(byte) == 1 byte
540set WORD, 		2			# len(word) == 2 bytes
541set LONG, 		4			# len(longword) == 2 bytes
542
543set BSUN_VEC,		0xc0			# bsun    vector offset
544set INEX_VEC,		0xc4			# inexact vector offset
545set DZ_VEC,		0xc8			# dz      vector offset
546set UNFL_VEC,		0xcc			# unfl    vector offset
547set OPERR_VEC,		0xd0			# operr   vector offset
548set OVFL_VEC,		0xd4			# ovfl    vector offset
549set SNAN_VEC,		0xd8			# snan    vector offset
550
551###########################
552# SPecial CONDition FLaGs #
553###########################
554set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
555set fbsun_flg,		0x02			# flag bit: bsun exception
556set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
557set mda7_flg,		0x08			# flag bit: -(a7) <ea>
558set fmovm_flg,		0x40			# flag bit: fmovm instruction
559set immed_flg,		0x80			# flag bit: &<data> <ea>
560
561set ftrapcc_bit,	0x0
562set fbsun_bit,		0x1
563set mia7_bit,		0x2
564set mda7_bit,		0x3
565set immed_bit,		0x7
566
567##################################
568# TRANSCENDENTAL "LAST-OP" FLAGS #
569##################################
570set FMUL_OP,		0x0			# fmul instr performed last
571set FDIV_OP,		0x1			# fdiv performed last
572set FADD_OP,		0x2			# fadd performed last
573set FMOV_OP,		0x3			# fmov performed last
574
575#############
576# CONSTANTS #
577#############
578T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
579T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
580
581PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
582PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
583
584TWOBYPI:
585	long		0x3FE45F30,0x6DC9C883
586
587#########################################################################
588# XDEF ****************************************************************	#
589#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
590#									#
591#	This handler should be the first code executed upon taking the	#
592#	FP Overflow exception in an operating system.			#
593#									#
594# XREF ****************************************************************	#
595#	_imem_read_long() - read instruction longword			#
596#	fix_skewed_ops() - adjust src operand in fsave frame		#
597#	set_tag_x() - determine optype of src/dst operands		#
598#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
599#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
600#	load_fpn2() - load dst operand from FP regfile			#
601#	fout() - emulate an opclass 3 instruction			#
602#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
603#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
604#	_real_ovfl() - "callout" for Overflow exception enabled code	#
605#	_real_inex() - "callout" for Inexact exception enabled code	#
606#	_real_trace() - "callout" for Trace exception code		#
607#									#
608# INPUT ***************************************************************	#
609#	- The system stack contains the FP Ovfl exception stack frame	#
610#	- The fsave frame contains the source operand			#
611# 									#
612# OUTPUT **************************************************************	#
613#	Overflow Exception enabled:					#
614#	- The system stack is unchanged					#
615#	- The fsave frame contains the adjusted src op for opclass 0,2	#
616#	Overflow Exception disabled:					#
617#	- The system stack is unchanged					#
618#	- The "exception present" flag in the fsave frame is cleared	#
619#									#
620# ALGORITHM ***********************************************************	#
621#	On the 060, if an FP overflow is present as the result of any	#
622# instruction, the 060 will take an overflow exception whether the 	#
623# exception is enabled or disabled in the FPCR. For the disabled case, 	#
624# This handler emulates the instruction to determine what the correct	#
625# default result should be for the operation. This default result is	#
626# then stored in either the FP regfile, data regfile, or memory. 	#
627# Finally, the handler exits through the "callout" _fpsp_done() 	#
628# denoting that no exceptional conditions exist within the machine.	#
629# 	If the exception is enabled, then this handler must create the	#
630# exceptional operand and plave it in the fsave state frame, and store	#
631# the default result (only if the instruction is opclass 3). For 	#
632# exceptions enabled, this handler must exit through the "callout" 	#
633# _real_ovfl() so that the operating system enabled overflow handler	#
634# can handle this case.							#
635#	Two other conditions exist. First, if overflow was disabled 	#
636# but the inexact exception was enabled, this handler must exit 	#
637# through the "callout" _real_inex() regardless of whether the result	#
638# was inexact.								#
639#	Also, in the case of an opclass three instruction where 	#
640# overflow was disabled and the trace exception was enabled, this	#
641# handler must exit through the "callout" _real_trace().		#
642#									#
643#########################################################################
644
645	global		_fpsp_ovfl
646_fpsp_ovfl:
647
648#$#	sub.l		&24,%sp			# make room for src/dst
649
650	link.w		%a6,&-LOCAL_SIZE	# init stack frame
651
652	fsave		FP_SRC(%a6)		# grab the "busy" frame
653
654 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
655	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
656 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
657
658# the FPIAR holds the "current PC" of the faulting instruction
659	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
660	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
661	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
662	bsr.l		_imem_read_long		# fetch the instruction words
663	mov.l		%d0,EXC_OPWORD(%a6)
664
665##############################################################################
666
667	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
668	bne.w		fovfl_out
669
670
671	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
672	bsr.l		fix_skewed_ops		# fix src op
673
674# since, I believe, only NORMs and DENORMs can come through here,
675# maybe we can avoid the subroutine call.
676	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
677	bsr.l		set_tag_x		# tag the operand type
678	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
679
680# bit five of the fp extension word separates the monadic and dyadic operations
681# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
682# will never take this exception.
683	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
684	beq.b		fovfl_extract		# monadic
685
686	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
687	bsr.l		load_fpn2		# load dst into FP_DST
688
689	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
690	bsr.l		set_tag_x		# tag the operand type
691	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
692	bne.b		fovfl_op2_done		# no
693	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
694fovfl_op2_done:
695	mov.b		%d0,DTAG(%a6)		# save dst optype tag
696
697fovfl_extract:
698
699#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
700#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
701#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
702#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
703#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
704#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
705
706	clr.l		%d0
707	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
708
709	mov.b		1+EXC_CMDREG(%a6),%d1
710	andi.w		&0x007f,%d1		# extract extension
711
712	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
713
714	fmov.l		&0x0,%fpcr		# zero current control regs
715	fmov.l		&0x0,%fpsr
716
717	lea		FP_SRC(%a6),%a0
718	lea		FP_DST(%a6),%a1
719
720# maybe we can make these entry points ONLY the OVFL entry points of each routine.
721	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
722	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
723
724# the operation has been emulated. the result is in fp0.
725# the EXOP, if an exception occurred, is in fp1.
726# we must save the default result regardless of whether
727# traps are enabled or disabled.
728	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
729	bsr.l		store_fpreg
730
731# the exceptional possibilities we have left ourselves with are ONLY overflow
732# and inexact. and, the inexact is such that overflow occurred and was disabled
733# but inexact was enabled.
734	btst		&ovfl_bit,FPCR_ENABLE(%a6)
735	bne.b		fovfl_ovfl_on
736
737	btst		&inex2_bit,FPCR_ENABLE(%a6)
738	bne.b		fovfl_inex_on
739
740	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
741	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
742	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
743
744	unlk		%a6
745#$#	add.l		&24,%sp
746	bra.l		_fpsp_done
747
748# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
749# in fp1. now, simply jump to _real_ovfl()!
750fovfl_ovfl_on:
751	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
752
753	mov.w		&0xe005,2+FP_SRC(%a6) 	# save exc status
754
755	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
756	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
757	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
758
759	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
760
761	unlk		%a6
762
763	bra.l		_real_ovfl
764
765# overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
766# we must jump to real_inex().
767fovfl_inex_on:
768
769	fmovm.x		&0x40,FP_SRC(%a6) 	# save EXOP (fp1) to stack
770
771	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
772	mov.w		&0xe001,2+FP_SRC(%a6) 	# save exc status
773
774	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
775	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
776	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
777
778	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
779
780	unlk		%a6
781
782	bra.l		_real_inex
783
784########################################################################
785fovfl_out:
786
787
788#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
789#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
790#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
791
792# the src operand is definitely a NORM(!), so tag it as such
793	mov.b		&NORM,STAG(%a6)		# set src optype tag
794
795	clr.l		%d0
796	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
797
798	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
799
800	fmov.l		&0x0,%fpcr		# zero current control regs
801	fmov.l		&0x0,%fpsr
802
803	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
804
805	bsr.l		fout
806
807	btst		&ovfl_bit,FPCR_ENABLE(%a6)
808	bne.w		fovfl_ovfl_on
809
810	btst		&inex2_bit,FPCR_ENABLE(%a6)
811	bne.w		fovfl_inex_on
812
813	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
814	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
815	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
816
817	unlk		%a6
818#$#	add.l		&24,%sp
819
820	btst		&0x7,(%sp)		# is trace on?
821	beq.l		_fpsp_done		# no
822
823	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
824	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
825	bra.l		_real_trace
826
827#########################################################################
828# XDEF ****************************************************************	#
829#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
830#									#
831#	This handler should be the first code executed upon taking the	#
832#	FP Underflow exception in an operating system.			#
833#									#
834# XREF ****************************************************************	#
835#	_imem_read_long() - read instruction longword			#
836#	fix_skewed_ops() - adjust src operand in fsave frame		#
837#	set_tag_x() - determine optype of src/dst operands		#
838#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
839#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
840#	load_fpn2() - load dst operand from FP regfile			#
841#	fout() - emulate an opclass 3 instruction			#
842#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
843#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
844#	_real_ovfl() - "callout" for Overflow exception enabled code	#
845#	_real_inex() - "callout" for Inexact exception enabled code	#
846#	_real_trace() - "callout" for Trace exception code		#
847#									#
848# INPUT ***************************************************************	#
849#	- The system stack contains the FP Unfl exception stack frame	#
850#	- The fsave frame contains the source operand			#
851# 									#
852# OUTPUT **************************************************************	#
853#	Underflow Exception enabled:					#
854#	- The system stack is unchanged					#
855#	- The fsave frame contains the adjusted src op for opclass 0,2	#
856#	Underflow Exception disabled:					#
857#	- The system stack is unchanged					#
858#	- The "exception present" flag in the fsave frame is cleared	#
859#									#
860# ALGORITHM ***********************************************************	#
861#	On the 060, if an FP underflow is present as the result of any	#
862# instruction, the 060 will take an underflow exception whether the 	#
863# exception is enabled or disabled in the FPCR. For the disabled case, 	#
864# This handler emulates the instruction to determine what the correct	#
865# default result should be for the operation. This default result is	#
866# then stored in either the FP regfile, data regfile, or memory. 	#
867# Finally, the handler exits through the "callout" _fpsp_done() 	#
868# denoting that no exceptional conditions exist within the machine.	#
869# 	If the exception is enabled, then this handler must create the	#
870# exceptional operand and plave it in the fsave state frame, and store	#
871# the default result (only if the instruction is opclass 3). For 	#
872# exceptions enabled, this handler must exit through the "callout" 	#
873# _real_unfl() so that the operating system enabled overflow handler	#
874# can handle this case.							#
875#	Two other conditions exist. First, if underflow was disabled 	#
876# but the inexact exception was enabled and the result was inexact, 	#
877# this handler must exit through the "callout" _real_inex().		#
878# was inexact.								#
879#	Also, in the case of an opclass three instruction where 	#
880# underflow was disabled and the trace exception was enabled, this	#
881# handler must exit through the "callout" _real_trace().		#
882#									#
883#########################################################################
884
885	global		_fpsp_unfl
886_fpsp_unfl:
887
888#$#	sub.l		&24,%sp			# make room for src/dst
889
890	link.w		%a6,&-LOCAL_SIZE	# init stack frame
891
892	fsave		FP_SRC(%a6)		# grab the "busy" frame
893
894 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
895	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
896 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
897
898# the FPIAR holds the "current PC" of the faulting instruction
899	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
900	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
901	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
902	bsr.l		_imem_read_long		# fetch the instruction words
903	mov.l		%d0,EXC_OPWORD(%a6)
904
905##############################################################################
906
907	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
908	bne.w		funfl_out
909
910
911	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
912	bsr.l		fix_skewed_ops		# fix src op
913
914	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
915	bsr.l		set_tag_x		# tag the operand type
916	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
917
918# bit five of the fp ext word separates the monadic and dyadic operations
919# that can pass through fpsp_unfl(). remember that fcmp, and ftst
920# will never take this exception.
921	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
922	beq.b		funfl_extract		# monadic
923
924# now, what's left that's not dyadic is fsincos. we can distinguish it
925# from all dyadics by the '0110xxx pattern
926	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
927	bne.b		funfl_extract		# yes
928
929	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
930	bsr.l		load_fpn2		# load dst into FP_DST
931
932	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
933	bsr.l		set_tag_x		# tag the operand type
934	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
935	bne.b		funfl_op2_done		# no
936	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
937funfl_op2_done:
938	mov.b		%d0,DTAG(%a6)		# save dst optype tag
939
940funfl_extract:
941
942#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
943#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
944#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
945#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
946#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
947#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
948
949	clr.l		%d0
950	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
951
952	mov.b		1+EXC_CMDREG(%a6),%d1
953	andi.w		&0x007f,%d1		# extract extension
954
955	andi.l		&0x00ff01ff,USER_FPSR(%a6)
956
957	fmov.l		&0x0,%fpcr		# zero current control regs
958	fmov.l		&0x0,%fpsr
959
960	lea		FP_SRC(%a6),%a0
961	lea		FP_DST(%a6),%a1
962
963# maybe we can make these entry points ONLY the OVFL entry points of each routine.
964	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
965	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
966
967	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
968	bsr.l		store_fpreg
969
970# The `060 FPU multiplier hardware is such that if the result of a
971# multiply operation is the smallest possible normalized number
972# (0x00000000_80000000_00000000), then the machine will take an
973# underflow exception. Since this is incorrect, we need to check
974# if our emulation, after re-doing the operation, decided that
975# no underflow was called for. We do these checks only in
976# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
977# special case will simply exit gracefully with the correct result.
978
979# the exceptional possibilities we have left ourselves with are ONLY overflow
980# and inexact. and, the inexact is such that overflow occurred and was disabled
981# but inexact was enabled.
982	btst		&unfl_bit,FPCR_ENABLE(%a6)
983	bne.b		funfl_unfl_on
984
985funfl_chkinex:
986	btst		&inex2_bit,FPCR_ENABLE(%a6)
987	bne.b		funfl_inex_on
988
989funfl_exit:
990	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
991	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
992	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
993
994	unlk		%a6
995#$#	add.l		&24,%sp
996	bra.l		_fpsp_done
997
998# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
999# in fp1 (don't forget to save fp0). what to do now?
1000# well, we simply have to get to go to _real_unfl()!
1001funfl_unfl_on:
1002
1003# The `060 FPU multiplier hardware is such that if the result of a
1004# multiply operation is the smallest possible normalized number
1005# (0x00000000_80000000_00000000), then the machine will take an
1006# underflow exception. Since this is incorrect, we check here to see
1007# if our emulation, after re-doing the operation, decided that
1008# no underflow was called for.
1009	btst		&unfl_bit,FPSR_EXCEPT(%a6)
1010	beq.w		funfl_chkinex
1011
1012funfl_unfl_on2:
1013	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
1014
1015	mov.w		&0xe003,2+FP_SRC(%a6) 	# save exc status
1016
1017	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1018	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1019	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1020
1021	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1022
1023	unlk		%a6
1024
1025	bra.l		_real_unfl
1026
1027# undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
1028# we must jump to real_inex().
1029funfl_inex_on:
1030
1031# The `060 FPU multiplier hardware is such that if the result of a
1032# multiply operation is the smallest possible normalized number
1033# (0x00000000_80000000_00000000), then the machine will take an
1034# underflow exception.
1035# But, whether bogus or not, if inexact is enabled AND it occurred,
1036# then we have to branch to real_inex.
1037
1038	btst		&inex2_bit,FPSR_EXCEPT(%a6)
1039	beq.w		funfl_exit
1040
1041funfl_inex_on2:
1042
1043	fmovm.x		&0x40,FP_SRC(%a6) 	# save EXOP to stack
1044
1045	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
1046	mov.w		&0xe001,2+FP_SRC(%a6) 	# save exc status
1047
1048	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1049	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1050	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1051
1052	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1053
1054	unlk		%a6
1055
1056	bra.l		_real_inex
1057
1058#######################################################################
1059funfl_out:
1060
1061
1062#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1063#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1064#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1065
1066# the src operand is definitely a NORM(!), so tag it as such
1067	mov.b		&NORM,STAG(%a6)		# set src optype tag
1068
1069	clr.l		%d0
1070	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
1071
1072	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1073
1074	fmov.l		&0x0,%fpcr		# zero current control regs
1075	fmov.l		&0x0,%fpsr
1076
1077	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1078
1079	bsr.l		fout
1080
1081	btst		&unfl_bit,FPCR_ENABLE(%a6)
1082	bne.w		funfl_unfl_on2
1083
1084	btst		&inex2_bit,FPCR_ENABLE(%a6)
1085	bne.w		funfl_inex_on2
1086
1087	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1088	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1089	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1090
1091	unlk		%a6
1092#$#	add.l		&24,%sp
1093
1094	btst		&0x7,(%sp)		# is trace on?
1095	beq.l		_fpsp_done		# no
1096
1097	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
1098	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
1099	bra.l		_real_trace
1100
1101#########################################################################
1102# XDEF ****************************************************************	#
1103#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
1104#		        Data Type" exception.				#
1105#									#
1106#	This handler should be the first code executed upon taking the	#
1107#	FP Unimplemented Data Type exception in an operating system.	#
1108#									#
1109# XREF ****************************************************************	#
1110#	_imem_read_{word,long}() - read instruction word/longword	#
1111#	fix_skewed_ops() - adjust src operand in fsave frame		#
1112#	set_tag_x() - determine optype of src/dst operands		#
1113#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
1114#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
1115#	load_fpn2() - load dst operand from FP regfile			#
1116#	load_fpn1() - load src operand from FP regfile			#
1117#	fout() - emulate an opclass 3 instruction			#
1118#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
1119#	_real_inex() - "callout" to operating system inexact handler	#
1120#	_fpsp_done() - "callout" for exit; work all done		#
1121#	_real_trace() - "callout" for Trace enabled exception		#
1122#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
1123#	_real_snan() - "callout" for SNAN exception			#
1124#	_real_operr() - "callout" for OPERR exception			#
1125#	_real_ovfl() - "callout" for OVFL exception			#
1126#	_real_unfl() - "callout" for UNFL exception			#
1127#	get_packed() - fetch packed operand from memory			#
1128#									#
1129# INPUT ***************************************************************	#
1130#	- The system stack contains the "Unimp Data Type" stk frame	#
1131#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
1132# 									#
1133# OUTPUT **************************************************************	#
1134#	If Inexact exception (opclass 3):				#
1135#	- The system stack is changed to an Inexact exception stk frame	#
1136#	If SNAN exception (opclass 3):					#
1137#	- The system stack is changed to an SNAN exception stk frame	#
1138#	If OPERR exception (opclass 3):					#
1139#	- The system stack is changed to an OPERR exception stk frame	#
1140#	If OVFL exception (opclass 3):					#
1141#	- The system stack is changed to an OVFL exception stk frame	#
1142#	If UNFL exception (opclass 3):					#
1143#	- The system stack is changed to an UNFL exception stack frame	#
1144#	If Trace exception enabled:					#
1145#	- The system stack is changed to a Trace exception stack frame	#
1146#	Else: (normal case)						#
1147#	- Correct result has been stored as appropriate			#
1148#									#
1149# ALGORITHM ***********************************************************	#
1150#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
1151# unimplemented data types. These can be either opclass 0,2 or 3 	#
1152# instructions, and (2) PACKED unimplemented data format instructions	#
1153# also of opclasses 0,2, or 3.						#
1154#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
1155# operand from the fsave state frame and the dst operand (if dyadic)	#
1156# from the FP register file. The instruction is then emulated by 	#
1157# choosing an emulation routine from a table of routines indexed by	#
1158# instruction type. Once the instruction has been emulated and result	#
1159# saved, then we check to see if any enabled exceptions resulted from	#
1160# instruction emulation. If none, then we exit through the "callout"	#
1161# _fpsp_done(). If there is an enabled FP exception, then we insert	#
1162# this exception into the FPU in the fsave state frame and then exit	#
1163# through _fpsp_done().							#
1164#	PACKED opclass 0 and 2 is similar in how the instruction is	#
1165# emulated and exceptions handled. The differences occur in how the	#
1166# handler loads the packed op (by calling get_packed() routine) and	#
1167# by the fact that a Trace exception could be pending for PACKED ops.	#
1168# If a Trace exception is pending, then the current exception stack	#
1169# frame is changed to a Trace exception stack frame and an exit is	#
1170# made through _real_trace().						#
1171#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
1172# performed by calling the routine fout(). If no exception should occur	#
1173# as the result of emulation, then an exit either occurs through	#
1174# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
1175# (a Trace stack frame must be created here, too). If an FP exception	#
1176# should occur, then we must create an exception stack frame of that	#
1177# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
1178# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 	#
1179# emulation is performed in a similar manner.				#
1180#									#
1181#########################################################################
1182
1183#
1184# (1) DENORM and UNNORM (unimplemented) data types:
1185#
1186#				post-instruction
1187#				*****************
1188#				*      EA	*
1189#	 pre-instruction	*		*
1190# 	*****************	*****************
1191#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
1192#	*****************	*****************
1193#	*     Next	*	*     Next	*
1194#	*      PC	*	*      PC	*
1195#	*****************	*****************
1196#	*      SR	*	*      SR	*
1197#	*****************	*****************
1198#
1199# (2) PACKED format (unsupported) opclasses two and three:
1200#	*****************
1201#	*      EA	*
1202#	*		*
1203#	*****************
1204#	* 0x2 *  0x0dc	*
1205#	*****************
1206#	*     Next	*
1207#	*      PC	*
1208#	*****************
1209#	*      SR	*
1210#	*****************
1211#
1212	global		_fpsp_unsupp
1213_fpsp_unsupp:
1214
1215	link.w		%a6,&-LOCAL_SIZE	# init stack frame
1216
1217	fsave		FP_SRC(%a6)		# save fp state
1218
1219 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
1220	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1221 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
1222
1223	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
1224	bne.b		fu_s
1225fu_u:
1226	mov.l		%usp,%a0		# fetch user stack pointer
1227	mov.l		%a0,EXC_A7(%a6)		# save on stack
1228	bra.b		fu_cont
1229# if the exception is an opclass zero or two unimplemented data type
1230# exception, then the a7' calculated here is wrong since it doesn't
1231# stack an ea. however, we don't need an a7' for this case anyways.
1232fu_s:
1233	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
1234	mov.l		%a0,EXC_A7(%a6)		# save on stack
1235
1236fu_cont:
1237
1238# the FPIAR holds the "current PC" of the faulting instruction
1239# the FPIAR should be set correctly for ALL exceptions passing through
1240# this point.
1241	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1242	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
1243	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
1244	bsr.l		_imem_read_long		# fetch the instruction words
1245	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
1246
1247############################
1248
1249	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
1250
1251# Separate opclass three (fpn-to-mem) ops since they have a different
1252# stack frame and protocol.
1253	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
1254	bne.w		fu_out			# yes
1255
1256# Separate packed opclass two instructions.
1257	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
1258	cmpi.b		%d0,&0x13
1259	beq.w		fu_in_pack
1260
1261
1262# I'm not sure at this point what FPSR bits are valid for this instruction.
1263# so, since the emulation routines re-create them anyways, zero exception field
1264	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
1265
1266	fmov.l		&0x0,%fpcr		# zero current control regs
1267	fmov.l		&0x0,%fpsr
1268
1269# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1270# precision format if the src format was single or double and the
1271# source data type was an INF, NAN, DENORM, or UNNORM
1272	lea		FP_SRC(%a6),%a0		# pass ptr to input
1273	bsr.l		fix_skewed_ops
1274
1275# we don't know whether the src operand or the dst operand (or both) is the
1276# UNNORM or DENORM. call the function that tags the operand type. if the
1277# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1278	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
1279	bsr.l		set_tag_x		# tag the operand type
1280	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1281	bne.b		fu_op2			# no
1282	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1283
1284fu_op2:
1285	mov.b		%d0,STAG(%a6)		# save src optype tag
1286
1287	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1288
1289# bit five of the fp extension word separates the monadic and dyadic operations
1290# at this point
1291	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1292	beq.b		fu_extract		# monadic
1293	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1294	beq.b		fu_extract		# yes, so it's monadic, too
1295
1296	bsr.l		load_fpn2		# load dst into FP_DST
1297
1298	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1299	bsr.l		set_tag_x		# tag the operand type
1300	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1301	bne.b		fu_op2_done		# no
1302	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1303fu_op2_done:
1304	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1305
1306fu_extract:
1307	clr.l		%d0
1308	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1309
1310	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1311
1312	lea		FP_SRC(%a6),%a0
1313	lea		FP_DST(%a6),%a1
1314
1315	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1316	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1317
1318#
1319# Exceptions in order of precedence:
1320# 	BSUN	: none
1321#	SNAN	: all dyadic ops
1322#	OPERR	: fsqrt(-NORM)
1323#	OVFL	: all except ftst,fcmp
1324#	UNFL	: all except ftst,fcmp
1325#	DZ	: fdiv
1326# 	INEX2	: all except ftst,fcmp
1327#	INEX1	: none (packed doesn't go through here)
1328#
1329
1330# we determine the highest priority exception(if any) set by the
1331# emulation routine that has also been enabled by the user.
1332	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
1333	bne.b		fu_in_ena		# some are enabled
1334
1335fu_in_cont:
1336# fcmp and ftst do not store any result.
1337	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1338	andi.b		&0x38,%d0		# extract bits 3-5
1339	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1340	beq.b		fu_in_exit		# yes
1341
1342	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1343	bsr.l		store_fpreg		# store the result
1344
1345fu_in_exit:
1346
1347	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1348	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1349	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1350
1351	unlk		%a6
1352
1353	bra.l		_fpsp_done
1354
1355fu_in_ena:
1356	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1357	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1358	bne.b		fu_in_exc		# there is at least one set
1359
1360#
1361# No exceptions occurred that were also enabled. Now:
1362#
1363#   	if (OVFL && ovfl_disabled && inexact_enabled) {
1364#	    branch to _real_inex() (even if the result was exact!);
1365#     	} else {
1366#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1367#	    return;
1368#     	}
1369#
1370	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1371	beq.b		fu_in_cont		# no
1372
1373fu_in_ovflchk:
1374	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1375	beq.b		fu_in_cont		# no
1376	bra.w		fu_in_exc_ovfl		# go insert overflow frame
1377
1378#
1379# An exception occurred and that exception was enabled:
1380#
1381#	shift enabled exception field into lo byte of d0;
1382#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1383#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1384#		/*
1385#		 * this is the case where we must call _real_inex() now or else
1386#		 * there will be no other way to pass it the exceptional operand
1387#		 */
1388#		call _real_inex();
1389#	} else {
1390#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1391#	}
1392#
1393fu_in_exc:
1394	subi.l		&24,%d0			# fix offset to be 0-8
1395	cmpi.b		%d0,&0x6		# is exception INEX? (6)
1396	bne.b		fu_in_exc_exit		# no
1397
1398# the enabled exception was inexact
1399	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1400	bne.w		fu_in_exc_unfl		# yes
1401	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1402	bne.w		fu_in_exc_ovfl		# yes
1403
1404# here, we insert the correct fsave status value into the fsave frame for the
1405# corresponding exception. the operand in the fsave frame should be the original
1406# src operand.
1407fu_in_exc_exit:
1408	mov.l		%d0,-(%sp)		# save d0
1409	bsr.l		funimp_skew		# skew sgl or dbl inputs
1410	mov.l		(%sp)+,%d0		# restore d0
1411
1412	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1413
1414	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1415	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1416	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1417
1418	frestore	FP_SRC(%a6)		# restore src op
1419
1420	unlk		%a6
1421
1422	bra.l		_fpsp_done
1423
1424tbl_except:
1425	short		0xe000,0xe006,0xe004,0xe005
1426	short		0xe003,0xe002,0xe001,0xe001
1427
1428fu_in_exc_unfl:
1429	mov.w		&0x4,%d0
1430	bra.b		fu_in_exc_exit
1431fu_in_exc_ovfl:
1432	mov.w		&0x03,%d0
1433	bra.b		fu_in_exc_exit
1434
1435# If the input operand to this operation was opclass two and a single
1436# or double precision denorm, inf, or nan, the operand needs to be
1437# "corrected" in order to have the proper equivalent extended precision
1438# number.
1439	global		fix_skewed_ops
1440fix_skewed_ops:
1441	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1442	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
1443	beq.b		fso_sgl			# yes
1444	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
1445	beq.b		fso_dbl			# yes
1446	rts					# no
1447
1448fso_sgl:
1449	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1450	andi.w		&0x7fff,%d0		# strip sign
1451	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
1452	beq.b		fso_sgl_dnrm_zero	# yes
1453	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
1454	beq.b		fso_infnan		# yes
1455	rts					# no
1456
1457fso_sgl_dnrm_zero:
1458	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1459	beq.b		fso_zero		# it's a skewed zero
1460fso_sgl_dnrm:
1461# here, we count on norm not to alter a0...
1462	bsr.l		norm			# normalize mantissa
1463	neg.w		%d0			# -shft amt
1464	addi.w		&0x3f81,%d0		# adjust new exponent
1465	andi.w		&0x8000,LOCAL_EX(%a0) 	# clear old exponent
1466	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1467	rts
1468
1469fso_zero:
1470	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
1471	rts
1472
1473fso_infnan:
1474	andi.b		&0x7f,LOCAL_HI(%a0) 	# clear j-bit
1475	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
1476	rts
1477
1478fso_dbl:
1479	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1480	andi.w		&0x7fff,%d0		# strip sign
1481	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
1482	beq.b		fso_dbl_dnrm_zero	# yes
1483	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
1484	beq.b		fso_infnan		# yes
1485	rts					# no
1486
1487fso_dbl_dnrm_zero:
1488	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1489	bne.b		fso_dbl_dnrm		# it's a skewed denorm
1490	tst.l		LOCAL_LO(%a0)		# is it a zero?
1491	beq.b		fso_zero		# yes
1492fso_dbl_dnrm:
1493# here, we count on norm not to alter a0...
1494	bsr.l		norm			# normalize mantissa
1495	neg.w		%d0			# -shft amt
1496	addi.w		&0x3c01,%d0		# adjust new exponent
1497	andi.w		&0x8000,LOCAL_EX(%a0) 	# clear old exponent
1498	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1499	rts
1500
1501#################################################################
1502
1503# fmove out took an unimplemented data type exception.
1504# the src operand is in FP_SRC. Call _fout() to write out the result and
1505# to determine which exceptions, if any, to take.
1506fu_out:
1507
1508# Separate packed move outs from the UNNORM and DENORM move outs.
1509	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
1510	cmpi.b		%d0,&0x3
1511	beq.w		fu_out_pack
1512	cmpi.b		%d0,&0x7
1513	beq.w		fu_out_pack
1514
1515
1516# I'm not sure at this point what FPSR bits are valid for this instruction.
1517# so, since the emulation routines re-create them anyways, zero exception field.
1518# fmove out doesn't affect ccodes.
1519	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
1520
1521	fmov.l		&0x0,%fpcr		# zero current control regs
1522	fmov.l		&0x0,%fpsr
1523
1524# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1525# call here. just figure out what it is...
1526	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
1527	andi.w		&0x7fff,%d0		# strip sign
1528	beq.b		fu_out_denorm		# it's a DENORM
1529
1530	lea		FP_SRC(%a6),%a0
1531	bsr.l		unnorm_fix		# yes; fix it
1532
1533	mov.b		%d0,STAG(%a6)
1534
1535	bra.b		fu_out_cont
1536fu_out_denorm:
1537	mov.b		&DENORM,STAG(%a6)
1538fu_out_cont:
1539
1540	clr.l		%d0
1541	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1542
1543	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1544
1545	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
1546	bsr.l		fout			# call fmove out routine
1547
1548# Exceptions in order of precedence:
1549# 	BSUN	: none
1550#	SNAN	: none
1551#	OPERR	: fmove.{b,w,l} out of large UNNORM
1552#	OVFL	: fmove.{s,d}
1553#	UNFL	: fmove.{s,d,x}
1554#	DZ	: none
1555# 	INEX2	: all
1556#	INEX1	: none (packed doesn't travel through here)
1557
1558# determine the highest priority exception(if any) set by the
1559# emulation routine that has also been enabled by the user.
1560	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1561	bne.w		fu_out_ena		# some are enabled
1562
1563fu_out_done:
1564
1565	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
1566
1567# on extended precision opclass three instructions using pre-decrement or
1568# post-increment addressing mode, the address register is not updated. is the
1569# address register was the stack pointer used from user mode, then let's update
1570# it here. if it was used from supervisor mode, then we have to handle this
1571# as a special case.
1572	btst		&0x5,EXC_SR(%a6)
1573	bne.b		fu_out_done_s
1574
1575	mov.l		EXC_A7(%a6),%a0		# restore a7
1576	mov.l		%a0,%usp
1577
1578fu_out_done_cont:
1579	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1580	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1581	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1582
1583	unlk		%a6
1584
1585	btst		&0x7,(%sp)		# is trace on?
1586	bne.b		fu_out_trace		# yes
1587
1588	bra.l		_fpsp_done
1589
1590# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1591# ("fmov.x fpm,-(a7)") if so,
1592fu_out_done_s:
1593	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
1594	bne.b		fu_out_done_cont
1595
1596# the extended precision result is still in fp0. but, we need to save it
1597# somewhere on the stack until we can copy it to its final resting place.
1598# here, we're counting on the top of the stack to be the old place-holders
1599# for fp0/fp1 which have already been restored. that way, we can write
1600# over those destinations with the shifted stack frame.
1601	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1602
1603	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1604	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1605	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1606
1607	mov.l		(%a6),%a6		# restore frame pointer
1608
1609	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1610	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1611
1612# now, copy the result to the proper place on the stack
1613	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1614	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1615	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1616
1617	add.l		&LOCAL_SIZE-0x8,%sp
1618
1619	btst		&0x7,(%sp)
1620	bne.b		fu_out_trace
1621
1622	bra.l		_fpsp_done
1623
1624fu_out_ena:
1625	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1626	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1627	bne.b		fu_out_exc		# there is at least one set
1628
1629# no exceptions were set.
1630# if a disabled overflow occurred and inexact was enabled but the result
1631# was exact, then a branch to _real_inex() is made.
1632	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1633	beq.w		fu_out_done		# no
1634
1635fu_out_ovflchk:
1636	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1637	beq.w		fu_out_done		# no
1638	bra.w		fu_inex			# yes
1639
1640#
1641# The fp move out that took the "Unimplemented Data Type" exception was
1642# being traced. Since the stack frames are similar, get the "current" PC
1643# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1644#
1645#		  UNSUPP FRAME		   TRACE FRAME
1646# 		*****************	*****************
1647#		*      EA	*	*    Current	*
1648#		*		*	*      PC	*
1649#		*****************	*****************
1650#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
1651#		*****************	*****************
1652#		*     Next	*	*     Next	*
1653#		*      PC	*	*      PC	*
1654#		*****************	*****************
1655#		*      SR	*	*      SR	*
1656#		*****************	*****************
1657#
1658fu_out_trace:
1659	mov.w		&0x2024,0x6(%sp)
1660	fmov.l		%fpiar,0x8(%sp)
1661	bra.l		_real_trace
1662
1663# an exception occurred and that exception was enabled.
1664fu_out_exc:
1665	subi.l		&24,%d0			# fix offset to be 0-8
1666
1667# we don't mess with the existing fsave frame. just re-insert it and
1668# jump to the "_real_{}()" handler...
1669	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
1670	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
1671
1672	swbeg		&0x8
1673tbl_fu_out:
1674	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
1675	short		tbl_fu_out 	- tbl_fu_out	# SNAN can't happen
1676	short		fu_operr	- tbl_fu_out	# OPERR
1677	short		fu_ovfl 	- tbl_fu_out	# OVFL
1678	short		fu_unfl 	- tbl_fu_out	# UNFL
1679	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
1680	short		fu_inex 	- tbl_fu_out	# INEX2
1681	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
1682
1683# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1684# frestore it.
1685fu_snan:
1686	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1687	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1688	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1689
1690	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
1691	mov.w		&0xe006,2+FP_SRC(%a6)
1692
1693	frestore	FP_SRC(%a6)
1694
1695	unlk		%a6
1696
1697
1698	bra.l		_real_snan
1699
1700fu_operr:
1701	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1702	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1703	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1704
1705	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
1706	mov.w		&0xe004,2+FP_SRC(%a6)
1707
1708	frestore	FP_SRC(%a6)
1709
1710	unlk		%a6
1711
1712
1713	bra.l		_real_operr
1714
1715fu_ovfl:
1716	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1717
1718	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1719	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1720	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1721
1722	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
1723	mov.w		&0xe005,2+FP_SRC(%a6)
1724
1725	frestore	FP_SRC(%a6)		# restore EXOP
1726
1727	unlk		%a6
1728
1729	bra.l		_real_ovfl
1730
1731# underflow can happen for extended precision. extended precision opclass
1732# three instruction exceptions don't update the stack pointer. so, if the
1733# exception occurred from user mode, then simply update a7 and exit normally.
1734# if the exception occurred from supervisor mode, check if
1735fu_unfl:
1736	mov.l		EXC_A6(%a6),(%a6)	# restore a6
1737
1738	btst		&0x5,EXC_SR(%a6)
1739	bne.w		fu_unfl_s
1740
1741	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
1742	mov.l		%a0,%usp		# to or not...
1743
1744fu_unfl_cont:
1745	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1746
1747	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1748	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1749	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1750
1751	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1752	mov.w		&0xe003,2+FP_SRC(%a6)
1753
1754	frestore	FP_SRC(%a6)		# restore EXOP
1755
1756	unlk		%a6
1757
1758	bra.l		_real_unfl
1759
1760fu_unfl_s:
1761	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1762	bne.b		fu_unfl_cont
1763
1764# the extended precision result is still in fp0. but, we need to save it
1765# somewhere on the stack until we can copy it to its final resting place
1766# (where the exc frame is currently). make sure it's not at the top of the
1767# frame or it will get overwritten when the exc stack frame is shifted "down".
1768	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1769	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
1770
1771	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1772	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1773	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1774
1775	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1776	mov.w		&0xe003,2+FP_DST(%a6)
1777
1778	frestore	FP_DST(%a6)		# restore EXOP
1779
1780	mov.l		(%a6),%a6		# restore frame pointer
1781
1782	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1783	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1784	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1785
1786# now, copy the result to the proper place on the stack
1787	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1788	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1789	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1790
1791	add.l		&LOCAL_SIZE-0x8,%sp
1792
1793	bra.l		_real_unfl
1794
1795# fmove in and out enter here.
1796fu_inex:
1797	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1798
1799	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1800	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1801	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1802
1803	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
1804	mov.w		&0xe001,2+FP_SRC(%a6)
1805
1806	frestore	FP_SRC(%a6)		# restore EXOP
1807
1808	unlk		%a6
1809
1810
1811	bra.l		_real_inex
1812
1813#########################################################################
1814#########################################################################
1815fu_in_pack:
1816
1817
1818# I'm not sure at this point what FPSR bits are valid for this instruction.
1819# so, since the emulation routines re-create them anyways, zero exception field
1820	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
1821
1822	fmov.l		&0x0,%fpcr		# zero current control regs
1823	fmov.l		&0x0,%fpsr
1824
1825	bsr.l		get_packed		# fetch packed src operand
1826
1827	lea		FP_SRC(%a6),%a0		# pass ptr to src
1828	bsr.l		set_tag_x		# set src optype tag
1829
1830	mov.b		%d0,STAG(%a6)		# save src optype tag
1831
1832	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1833
1834# bit five of the fp extension word separates the monadic and dyadic operations
1835# at this point
1836	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1837	beq.b		fu_extract_p		# monadic
1838	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1839	beq.b		fu_extract_p		# yes, so it's monadic, too
1840
1841	bsr.l		load_fpn2		# load dst into FP_DST
1842
1843	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1844	bsr.l		set_tag_x		# tag the operand type
1845	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1846	bne.b		fu_op2_done_p		# no
1847	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1848fu_op2_done_p:
1849	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1850
1851fu_extract_p:
1852	clr.l		%d0
1853	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1854
1855	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1856
1857	lea		FP_SRC(%a6),%a0
1858	lea		FP_DST(%a6),%a1
1859
1860	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1861	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1862
1863#
1864# Exceptions in order of precedence:
1865# 	BSUN	: none
1866#	SNAN	: all dyadic ops
1867#	OPERR	: fsqrt(-NORM)
1868#	OVFL	: all except ftst,fcmp
1869#	UNFL	: all except ftst,fcmp
1870#	DZ	: fdiv
1871# 	INEX2	: all except ftst,fcmp
1872#	INEX1	: all
1873#
1874
1875# we determine the highest priority exception(if any) set by the
1876# emulation routine that has also been enabled by the user.
1877	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1878	bne.w		fu_in_ena_p		# some are enabled
1879
1880fu_in_cont_p:
1881# fcmp and ftst do not store any result.
1882	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1883	andi.b		&0x38,%d0		# extract bits 3-5
1884	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1885	beq.b		fu_in_exit_p		# yes
1886
1887	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1888	bsr.l		store_fpreg		# store the result
1889
1890fu_in_exit_p:
1891
1892	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1893	bne.w		fu_in_exit_s_p		# supervisor
1894
1895	mov.l		EXC_A7(%a6),%a0		# update user a7
1896	mov.l		%a0,%usp
1897
1898fu_in_exit_cont_p:
1899	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1900	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1901	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1902
1903	unlk		%a6			# unravel stack frame
1904
1905	btst		&0x7,(%sp)		# is trace on?
1906	bne.w		fu_trace_p		# yes
1907
1908	bra.l		_fpsp_done		# exit to os
1909
1910# the exception occurred in supervisor mode. check to see if the
1911# addressing mode was (a7)+. if so, we'll need to shift the
1912# stack frame "up".
1913fu_in_exit_s_p:
1914	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1915	beq.b		fu_in_exit_cont_p	# no
1916
1917	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1918	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1919	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1920
1921	unlk		%a6			# unravel stack frame
1922
1923# shift the stack frame "up". we don't really care about the <ea> field.
1924	mov.l		0x4(%sp),0x10(%sp)
1925	mov.l		0x0(%sp),0xc(%sp)
1926	add.l		&0xc,%sp
1927
1928	btst		&0x7,(%sp)		# is trace on?
1929	bne.w		fu_trace_p		# yes
1930
1931	bra.l		_fpsp_done		# exit to os
1932
1933fu_in_ena_p:
1934	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
1935	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1936	bne.b		fu_in_exc_p		# at least one was set
1937
1938#
1939# No exceptions occurred that were also enabled. Now:
1940#
1941#   	if (OVFL && ovfl_disabled && inexact_enabled) {
1942#	    branch to _real_inex() (even if the result was exact!);
1943#     	} else {
1944#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1945#	    return;
1946#     	}
1947#
1948	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1949	beq.w		fu_in_cont_p		# no
1950
1951fu_in_ovflchk_p:
1952	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1953	beq.w		fu_in_cont_p		# no
1954	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
1955
1956#
1957# An exception occurred and that exception was enabled:
1958#
1959#	shift enabled exception field into lo byte of d0;
1960#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1961#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1962#		/*
1963#		 * this is the case where we must call _real_inex() now or else
1964#		 * there will be no other way to pass it the exceptional operand
1965#		 */
1966#		call _real_inex();
1967#	} else {
1968#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1969#	}
1970#
1971fu_in_exc_p:
1972	subi.l		&24,%d0			# fix offset to be 0-8
1973	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
1974	blt.b		fu_in_exc_exit_p	# no
1975
1976# the enabled exception was inexact
1977	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1978	bne.w		fu_in_exc_unfl_p	# yes
1979	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1980	bne.w		fu_in_exc_ovfl_p	# yes
1981
1982# here, we insert the correct fsave status value into the fsave frame for the
1983# corresponding exception. the operand in the fsave frame should be the original
1984# src operand.
1985# as a reminder for future predicted pain and agony, we are passing in fsave the
1986# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1987# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1988fu_in_exc_exit_p:
1989	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1990	bne.w		fu_in_exc_exit_s_p	# supervisor
1991
1992	mov.l		EXC_A7(%a6),%a0		# update user a7
1993	mov.l		%a0,%usp
1994
1995fu_in_exc_exit_cont_p:
1996	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1997
1998	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1999	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2000	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2001
2002	frestore	FP_SRC(%a6)		# restore src op
2003
2004	unlk		%a6
2005
2006	btst		&0x7,(%sp)		# is trace enabled?
2007	bne.w		fu_trace_p		# yes
2008
2009	bra.l		_fpsp_done
2010
2011tbl_except_p:
2012	short		0xe000,0xe006,0xe004,0xe005
2013	short		0xe003,0xe002,0xe001,0xe001
2014
2015fu_in_exc_ovfl_p:
2016	mov.w		&0x3,%d0
2017	bra.w		fu_in_exc_exit_p
2018
2019fu_in_exc_unfl_p:
2020	mov.w		&0x4,%d0
2021	bra.w		fu_in_exc_exit_p
2022
2023fu_in_exc_exit_s_p:
2024	btst		&mia7_bit,SPCOND_FLG(%a6)
2025	beq.b		fu_in_exc_exit_cont_p
2026
2027	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2028
2029	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2030	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2031	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2032
2033	frestore	FP_SRC(%a6)		# restore src op
2034
2035	unlk		%a6			# unravel stack frame
2036
2037# shift stack frame "up". who cares about <ea> field.
2038	mov.l		0x4(%sp),0x10(%sp)
2039	mov.l		0x0(%sp),0xc(%sp)
2040	add.l		&0xc,%sp
2041
2042	btst		&0x7,(%sp)		# is trace on?
2043	bne.b		fu_trace_p		# yes
2044
2045	bra.l		_fpsp_done		# exit to os
2046
2047#
2048# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2049# exception was being traced. Make the "current" PC the FPIAR and put it in the
2050# trace stack frame then jump to _real_trace().
2051#
2052#		  UNSUPP FRAME		   TRACE FRAME
2053#		*****************	*****************
2054#		*      EA	*	*    Current	*
2055#		*		*	*      PC	*
2056#		*****************	*****************
2057#		* 0x2 *	0x0dc	* 	* 0x2 *  0x024	*
2058#		*****************	*****************
2059#		*     Next	*	*     Next	*
2060#		*      PC	*      	*      PC	*
2061#		*****************	*****************
2062#		*      SR	*	*      SR	*
2063#		*****************	*****************
2064fu_trace_p:
2065	mov.w		&0x2024,0x6(%sp)
2066	fmov.l		%fpiar,0x8(%sp)
2067
2068	bra.l		_real_trace
2069
2070#########################################################
2071#########################################################
2072fu_out_pack:
2073
2074
2075# I'm not sure at this point what FPSR bits are valid for this instruction.
2076# so, since the emulation routines re-create them anyways, zero exception field.
2077# fmove out doesn't affect ccodes.
2078	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
2079
2080	fmov.l		&0x0,%fpcr		# zero current control regs
2081	fmov.l		&0x0,%fpsr
2082
2083	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
2084	bsr.l		load_fpn1
2085
2086# unlike other opclass 3, unimplemented data type exceptions, packed must be
2087# able to detect all operand types.
2088	lea		FP_SRC(%a6),%a0
2089	bsr.l		set_tag_x		# tag the operand type
2090	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2091	bne.b		fu_op2_p		# no
2092	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
2093
2094fu_op2_p:
2095	mov.b		%d0,STAG(%a6)		# save src optype tag
2096
2097	clr.l		%d0
2098	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
2099
2100	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
2101
2102	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
2103	bsr.l		fout			# call fmove out routine
2104
2105# Exceptions in order of precedence:
2106# 	BSUN	: no
2107#	SNAN	: yes
2108#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2109#	OVFL	: no
2110#	UNFL	: no
2111#	DZ	: no
2112# 	INEX2	: yes
2113#	INEX1	: no
2114
2115# determine the highest priority exception(if any) set by the
2116# emulation routine that has also been enabled by the user.
2117	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2118	bne.w		fu_out_ena_p		# some are enabled
2119
2120fu_out_exit_p:
2121	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2122
2123	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
2124	bne.b		fu_out_exit_s_p		# supervisor
2125
2126	mov.l		EXC_A7(%a6),%a0		# update user a7
2127	mov.l		%a0,%usp
2128
2129fu_out_exit_cont_p:
2130	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2131	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2132	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2133
2134	unlk		%a6			# unravel stack frame
2135
2136	btst		&0x7,(%sp)		# is trace on?
2137	bne.w		fu_trace_p		# yes
2138
2139	bra.l		_fpsp_done		# exit to os
2140
2141# the exception occurred in supervisor mode. check to see if the
2142# addressing mode was -(a7). if so, we'll need to shift the
2143# stack frame "down".
2144fu_out_exit_s_p:
2145	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2146	beq.b		fu_out_exit_cont_p	# no
2147
2148	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2149	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2150	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2151
2152	mov.l		(%a6),%a6		# restore frame pointer
2153
2154	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2155	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2156
2157# now, copy the result to the proper place on the stack
2158	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2159	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2160	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2161
2162	add.l		&LOCAL_SIZE-0x8,%sp
2163
2164	btst		&0x7,(%sp)
2165	bne.w		fu_trace_p
2166
2167	bra.l		_fpsp_done
2168
2169fu_out_ena_p:
2170	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
2171	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2172	beq.w		fu_out_exit_p
2173
2174	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2175
2176# an exception occurred and that exception was enabled.
2177# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2178fu_out_exc_p:
2179	cmpi.b		%d0,&0x1a
2180	bgt.w		fu_inex_p2
2181	beq.w		fu_operr_p
2182
2183fu_snan_p:
2184	btst		&0x5,EXC_SR(%a6)
2185	bne.b		fu_snan_s_p
2186
2187	mov.l		EXC_A7(%a6),%a0
2188	mov.l		%a0,%usp
2189	bra.w		fu_snan
2190
2191fu_snan_s_p:
2192	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2193	bne.w		fu_snan
2194
2195# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2196# the strategy is to move the exception frame "down" 12 bytes. then, we
2197# can store the default result where the exception frame was.
2198	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2199	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2200	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2201
2202	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
2203	mov.w		&0xe006,2+FP_SRC(%a6) 	# set fsave status
2204
2205	frestore	FP_SRC(%a6)		# restore src operand
2206
2207	mov.l		(%a6),%a6		# restore frame pointer
2208
2209	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2210	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2211	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2212
2213# now, we copy the default result to it's proper location
2214	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2215	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2216	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2217
2218	add.l		&LOCAL_SIZE-0x8,%sp
2219
2220
2221	bra.l		_real_snan
2222
2223fu_operr_p:
2224	btst		&0x5,EXC_SR(%a6)
2225	bne.w		fu_operr_p_s
2226
2227	mov.l		EXC_A7(%a6),%a0
2228	mov.l		%a0,%usp
2229	bra.w		fu_operr
2230
2231fu_operr_p_s:
2232	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2233	bne.w		fu_operr
2234
2235# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2236# the strategy is to move the exception frame "down" 12 bytes. then, we
2237# can store the default result where the exception frame was.
2238	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2239	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2240	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2241
2242	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
2243	mov.w		&0xe004,2+FP_SRC(%a6) 	# set fsave status
2244
2245	frestore	FP_SRC(%a6)		# restore src operand
2246
2247	mov.l		(%a6),%a6		# restore frame pointer
2248
2249	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2250	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2251	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2252
2253# now, we copy the default result to it's proper location
2254	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2255	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2256	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2257
2258	add.l		&LOCAL_SIZE-0x8,%sp
2259
2260
2261	bra.l		_real_operr
2262
2263fu_inex_p2:
2264	btst		&0x5,EXC_SR(%a6)
2265	bne.w		fu_inex_s_p2
2266
2267	mov.l		EXC_A7(%a6),%a0
2268	mov.l		%a0,%usp
2269	bra.w		fu_inex
2270
2271fu_inex_s_p2:
2272	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2273	bne.w		fu_inex
2274
2275# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2276# the strategy is to move the exception frame "down" 12 bytes. then, we
2277# can store the default result where the exception frame was.
2278	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2279	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2280	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2281
2282	mov.w		&0x30c4,EXC_VOFF(%a6) 	# vector offset = 0xc4
2283	mov.w		&0xe001,2+FP_SRC(%a6) 	# set fsave status
2284
2285	frestore	FP_SRC(%a6)		# restore src operand
2286
2287	mov.l		(%a6),%a6		# restore frame pointer
2288
2289	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2290	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2291	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2292
2293# now, we copy the default result to it's proper location
2294	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2295	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2296	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2297
2298	add.l		&LOCAL_SIZE-0x8,%sp
2299
2300
2301	bra.l		_real_inex
2302
2303#########################################################################
2304
2305#
2306# if we're stuffing a source operand back into an fsave frame then we
2307# have to make sure that for single or double source operands that the
2308# format stuffed is as weird as the hardware usually makes it.
2309#
2310	global		funimp_skew
2311funimp_skew:
2312	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2313	cmpi.b		%d0,&0x1		# was src sgl?
2314	beq.b		funimp_skew_sgl		# yes
2315	cmpi.b		%d0,&0x5		# was src dbl?
2316	beq.b		funimp_skew_dbl		# yes
2317	rts
2318
2319funimp_skew_sgl:
2320	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2321	andi.w		&0x7fff,%d0		# strip sign
2322	beq.b		funimp_skew_sgl_not
2323	cmpi.w		%d0,&0x3f80
2324	bgt.b		funimp_skew_sgl_not
2325	neg.w		%d0			# make exponent negative
2326	addi.w		&0x3f81,%d0		# find amt to shift
2327	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
2328	lsr.l		%d0,%d1			# shift it
2329	bset		&31,%d1			# set j-bit
2330	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
2331	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
2332	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
2333funimp_skew_sgl_not:
2334	rts
2335
2336funimp_skew_dbl:
2337	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2338	andi.w		&0x7fff,%d0		# strip sign
2339	beq.b		funimp_skew_dbl_not
2340	cmpi.w		%d0,&0x3c00
2341	bgt.b		funimp_skew_dbl_not
2342
2343	tst.b		FP_SRC_EX(%a6)		# make "internal format"
2344	smi.b		0x2+FP_SRC(%a6)
2345	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
2346	clr.l		%d0			# clear g,r,s
2347	lea		FP_SRC(%a6),%a0		# pass ptr to src op
2348	mov.w		&0x3c01,%d1		# pass denorm threshold
2349	bsr.l		dnrm_lp			# denorm it
2350	mov.w		&0x3c00,%d0		# new exponent
2351	tst.b		0x2+FP_SRC(%a6)		# is sign set?
2352	beq.b		fss_dbl_denorm_done	# no
2353	bset		&15,%d0			# set sign
2354fss_dbl_denorm_done:
2355	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
2356	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
2357funimp_skew_dbl_not:
2358	rts
2359
2360#########################################################################
2361	global		_mem_write2
2362_mem_write2:
2363	btst		&0x5,EXC_SR(%a6)
2364	beq.l		_dmem_write
2365	mov.l		0x0(%a0),FP_DST_EX(%a6)
2366	mov.l		0x4(%a0),FP_DST_HI(%a6)
2367	mov.l		0x8(%a0),FP_DST_LO(%a6)
2368	clr.l		%d1
2369	rts
2370
2371#########################################################################
2372# XDEF ****************************************************************	#
2373#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
2374#		     	effective address" exception.			#
2375#									#
2376#	This handler should be the first code executed upon taking the	#
2377#	FP Unimplemented Effective Address exception in an operating	#
2378#	system.								#
2379#									#
2380# XREF ****************************************************************	#
2381#	_imem_read_long() - read instruction longword			#
2382#	fix_skewed_ops() - adjust src operand in fsave frame		#
2383#	set_tag_x() - determine optype of src/dst operands		#
2384#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
2385#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
2386#	load_fpn2() - load dst operand from FP regfile			#
2387#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
2388#	decbin() - convert packed data to FP binary data		#
2389#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
2390#	_real_access() - "callout" for access error exception		#
2391#	_mem_read() - read extended immediate operand from memory	#
2392#	_fpsp_done() - "callout" for exit; work all done		#
2393#	_real_trace() - "callout" for Trace enabled exception		#
2394#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
2395#	fmovm_ctrl() - emulate fmovm control instruction		#
2396#									#
2397# INPUT ***************************************************************	#
2398#	- The system stack contains the "Unimplemented <ea>" stk frame	#
2399# 									#
2400# OUTPUT **************************************************************	#
2401#	If access error:						#
2402#	- The system stack is changed to an access error stack frame	#
2403#	If FPU disabled:						#
2404#	- The system stack is changed to an FPU disabled stack frame	#
2405#	If Trace exception enabled:					#
2406#	- The system stack is changed to a Trace exception stack frame	#
2407#	Else: (normal case)						#
2408#	- None (correct result has been stored as appropriate)		#
2409#									#
2410# ALGORITHM ***********************************************************	#
2411#	This exception handles 3 types of operations:			#
2412# (1) FP Instructions using extended precision or packed immediate	#
2413#     addressing mode.							#
2414# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
2415# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
2416#									#
2417#	For immediate data operations, the data is read in w/ a		#
2418# _mem_read() "callout", converted to FP binary (if packed), and used	#
2419# as the source operand to the instruction specified by the instruction	#
2420# word. If no FP exception should be reported ads a result of the 	#
2421# emulation, then the result is stored to the destination register and	#
2422# the handler exits through _fpsp_done(). If an enabled exc has been	#
2423# signalled as a result of emulation, then an fsave state frame		#
2424# corresponding to the FP exception type must be entered into the 060	#
2425# FPU before exiting. In either the enabled or disabled cases, we 	#
2426# must also check if a Trace exception is pending, in which case, we	#
2427# must create a Trace exception stack frame from the current exception	#
2428# stack frame. If no Trace is pending, we simply exit through		#
2429# _fpsp_done().								#
2430#	For "fmovm.x", call the routine fmovm_dynamic() which will 	#
2431# decode and emulate the instruction. No FP exceptions can be pending	#
2432# as a result of this operation emulation. A Trace exception can be	#
2433# pending, though, which means the current stack frame must be changed	#
2434# to a Trace stack frame and an exit made through _real_trace().	#
2435# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
2436# was executed from supervisor mode, this handler must store the FP	#
2437# register file values to the system stack by itself since		#
2438# fmovm_dynamic() can't handle this. A normal exit is made through	#
2439# fpsp_done().								#
2440#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
2441# Again, a Trace exception may be pending and an exit made through	#
2442# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
2443#									#
2444#	Before any of the above is attempted, it must be checked to	#
2445# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
2446# before the "FPU disabled" exception, but the "FPU disabled" exception	#
2447# has higher priority, we check the disabled bit in the PCR. If set,	#
2448# then we must create an 8 word "FPU disabled" exception stack frame	#
2449# from the current 4 word exception stack frame. This includes 		#
2450# reproducing the effective address of the instruction to put on the 	#
2451# new stack frame.							#
2452#									#
2453# 	In the process of all emulation work, if a _mem_read()		#
2454# "callout" returns a failing result indicating an access error, then	#
2455# we must create an access error stack frame from the current stack	#
2456# frame. This information includes a faulting address and a fault-	#
2457# status-longword. These are created within this handler.		#
2458#									#
2459#########################################################################
2460
2461	global		_fpsp_effadd
2462_fpsp_effadd:
2463
2464# This exception type takes priority over the "Line F Emulator"
2465# exception. Therefore, the FPU could be disabled when entering here.
2466# So, we must check to see if it's disabled and handle that case separately.
2467	mov.l		%d0,-(%sp)		# save d0
2468	movc		%pcr,%d0		# load proc cr
2469	btst		&0x1,%d0		# is FPU disabled?
2470	bne.w		iea_disabled		# yes
2471	mov.l		(%sp)+,%d0		# restore d0
2472
2473	link		%a6,&-LOCAL_SIZE	# init stack frame
2474
2475	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2476	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2477	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
2478
2479# PC of instruction that took the exception is the PC in the frame
2480	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2481
2482	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2483	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2484	bsr.l		_imem_read_long		# fetch the instruction words
2485	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2486
2487#########################################################################
2488
2489	tst.w		%d0			# is operation fmovem?
2490	bmi.w		iea_fmovm		# yes
2491
2492#
2493# here, we will have:
2494# 	fabs	fdabs	fsabs		facos		fmod
2495#	fadd	fdadd	fsadd		fasin		frem
2496# 	fcmp				fatan		fscale
2497#	fdiv	fddiv	fsdiv		fatanh		fsin
2498#	fint				fcos		fsincos
2499#	fintrz				fcosh		fsinh
2500#	fmove	fdmove	fsmove		fetox		ftan
2501# 	fmul	fdmul	fsmul		fetoxm1		ftanh
2502#	fneg	fdneg	fsneg		fgetexp		ftentox
2503#	fsgldiv				fgetman		ftwotox
2504# 	fsglmul				flog10
2505# 	fsqrt				flog2
2506#	fsub	fdsub	fssub		flogn
2507#	ftst				flognp1
2508# which can all use f<op>.{x,p}
2509# so, now it's immediate data extended precision AND PACKED FORMAT!
2510#
2511iea_op:
2512	andi.l		&0x00ff00ff,USER_FPSR(%a6)
2513
2514	btst		&0xa,%d0		# is src fmt x or p?
2515	bne.b		iea_op_pack		# packed
2516
2517
2518	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2519	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
2520	mov.l		&0xc,%d0		# pass: 12 bytes
2521	bsr.l		_imem_read		# read extended immediate
2522
2523	tst.l		%d1			# did ifetch fail?
2524	bne.w		iea_iacc		# yes
2525
2526	bra.b		iea_op_setsrc
2527
2528iea_op_pack:
2529
2530	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2531	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
2532	mov.l		&0xc,%d0		# pass: 12 bytes
2533	bsr.l		_imem_read		# read packed operand
2534
2535	tst.l		%d1			# did ifetch fail?
2536	bne.w		iea_iacc		# yes
2537
2538# The packed operand is an INF or a NAN if the exponent field is all ones.
2539	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
2540	cmpi.w		%d0,&0x7fff		# INF or NAN?
2541	beq.b		iea_op_setsrc		# operand is an INF or NAN
2542
2543# The packed operand is a zero if the mantissa is all zero, else it's
2544# a normal packed op.
2545	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
2546	andi.b		&0x0f,%d0		# clear all but last nybble
2547	bne.b		iea_op_gp_not_spec	# not a zero
2548	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
2549	bne.b		iea_op_gp_not_spec	# not a zero
2550	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
2551	beq.b		iea_op_setsrc		# operand is a ZERO
2552iea_op_gp_not_spec:
2553	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
2554	bsr.l		decbin			# convert to extended
2555	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
2556
2557iea_op_setsrc:
2558	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
2559
2560# FP_SRC now holds the src operand.
2561	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
2562	bsr.l		set_tag_x		# tag the operand type
2563	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
2564	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2565	bne.b		iea_op_getdst		# no
2566	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2567	mov.b		%d0,STAG(%a6)		# set new optype tag
2568iea_op_getdst:
2569	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
2570
2571	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
2572	beq.b		iea_op_extract		# monadic
2573	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
2574	bne.b		iea_op_spec		# yes
2575
2576iea_op_loaddst:
2577	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2578	bsr.l		load_fpn2		# load dst operand
2579
2580	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
2581	bsr.l		set_tag_x		# tag the operand type
2582	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
2583	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2584	bne.b		iea_op_extract		# no
2585	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2586	mov.b		%d0,DTAG(%a6)		# set new optype tag
2587	bra.b		iea_op_extract
2588
2589# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2590iea_op_spec:
2591	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
2592	beq.b		iea_op_extract		# yes
2593# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2594# store a result. then, only fcmp will branch back and pick up a dst operand.
2595	st		STORE_FLG(%a6)		# don't store a final result
2596	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
2597	beq.b		iea_op_loaddst		# yes
2598
2599iea_op_extract:
2600	clr.l		%d0
2601	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
2602
2603	mov.b		1+EXC_CMDREG(%a6),%d1
2604	andi.w		&0x007f,%d1		# extract extension
2605
2606	fmov.l		&0x0,%fpcr
2607	fmov.l		&0x0,%fpsr
2608
2609	lea		FP_SRC(%a6),%a0
2610	lea		FP_DST(%a6),%a1
2611
2612	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2613	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
2614
2615#
2616# Exceptions in order of precedence:
2617#	BSUN	: none
2618#	SNAN	: all operations
2619#	OPERR	: all reg-reg or mem-reg operations that can normally operr
2620#	OVFL	: same as OPERR
2621#	UNFL	: same as OPERR
2622#	DZ	: same as OPERR
2623#	INEX2	: same as OPERR
2624#	INEX1	: all packed immediate operations
2625#
2626
2627# we determine the highest priority exception(if any) set by the
2628# emulation routine that has also been enabled by the user.
2629	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2630	bne.b		iea_op_ena		# some are enabled
2631
2632# now, we save the result, unless, of course, the operation was ftst or fcmp.
2633# these don't save results.
2634iea_op_save:
2635	tst.b		STORE_FLG(%a6)		# does this op store a result?
2636	bne.b		iea_op_exit1		# exit with no frestore
2637
2638iea_op_store:
2639	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2640	bsr.l		store_fpreg		# store the result
2641
2642iea_op_exit1:
2643	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2644	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2645
2646	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2647	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2648	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2649
2650	unlk		%a6			# unravel the frame
2651
2652	btst		&0x7,(%sp)		# is trace on?
2653	bne.w		iea_op_trace		# yes
2654
2655	bra.l		_fpsp_done		# exit to os
2656
2657iea_op_ena:
2658	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
2659	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2660	bne.b		iea_op_exc		# at least one was set
2661
2662# no exception occurred. now, did a disabled, exact overflow occur with inexact
2663# enabled? if so, then we have to stuff an overflow frame into the FPU.
2664	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2665	beq.b		iea_op_save
2666
2667iea_op_ovfl:
2668	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2669	beq.b		iea_op_store		# no
2670	bra.b		iea_op_exc_ovfl		# yes
2671
2672# an enabled exception occurred. we have to insert the exception type back into
2673# the machine.
2674iea_op_exc:
2675	subi.l		&24,%d0			# fix offset to be 0-8
2676	cmpi.b		%d0,&0x6		# is exception INEX?
2677	bne.b		iea_op_exc_force	# no
2678
2679# the enabled exception was inexact. so, if it occurs with an overflow
2680# or underflow that was disabled, then we have to force an overflow or
2681# underflow frame.
2682	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2683	bne.b		iea_op_exc_ovfl		# yes
2684	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2685	bne.b		iea_op_exc_unfl		# yes
2686
2687iea_op_exc_force:
2688	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2689	bra.b		iea_op_exit2		# exit with frestore
2690
2691tbl_iea_except:
2692	short		0xe002, 0xe006, 0xe004, 0xe005
2693	short		0xe003, 0xe002, 0xe001, 0xe001
2694
2695iea_op_exc_ovfl:
2696	mov.w		&0xe005,2+FP_SRC(%a6)
2697	bra.b		iea_op_exit2
2698
2699iea_op_exc_unfl:
2700	mov.w		&0xe003,2+FP_SRC(%a6)
2701
2702iea_op_exit2:
2703	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2704	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2705
2706	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2707	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2708	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2709
2710	frestore 	FP_SRC(%a6)		# restore exceptional state
2711
2712	unlk		%a6			# unravel the frame
2713
2714	btst		&0x7,(%sp)		# is trace on?
2715	bne.b		iea_op_trace		# yes
2716
2717	bra.l		_fpsp_done		# exit to os
2718
2719#
2720# The opclass two instruction that took an "Unimplemented Effective Address"
2721# exception was being traced. Make the "current" PC the FPIAR and put it in
2722# the trace stack frame then jump to _real_trace().
2723#
2724#		 UNIMP EA FRAME		   TRACE FRAME
2725#		*****************	*****************
2726#		* 0x0 *  0x0f0	*	*    Current	*
2727#		*****************	*      PC	*
2728#		*    Current	*	*****************
2729#		*      PC	*	* 0x2 *  0x024	*
2730#		*****************	*****************
2731#		*      SR	*	*     Next	*
2732#		*****************	*      PC	*
2733#					*****************
2734#					*      SR	*
2735#					*****************
2736iea_op_trace:
2737	mov.l		(%sp),-(%sp)		# shift stack frame "down"
2738	mov.w		0x8(%sp),0x4(%sp)
2739	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
2740	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
2741
2742	bra.l		_real_trace
2743
2744#########################################################################
2745iea_fmovm:
2746	btst		&14,%d0			# ctrl or data reg
2747	beq.w		iea_fmovm_ctrl
2748
2749iea_fmovm_data:
2750
2751	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
2752	bne.b		iea_fmovm_data_s
2753
2754iea_fmovm_data_u:
2755	mov.l		%usp,%a0
2756	mov.l		%a0,EXC_A7(%a6)		# store current a7
2757	bsr.l		fmovm_dynamic		# do dynamic fmovm
2758	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
2759	mov.l		%a0,%usp		# update usp
2760	bra.w		iea_fmovm_exit
2761
2762iea_fmovm_data_s:
2763	clr.b		SPCOND_FLG(%a6)
2764	lea		0x2+EXC_VOFF(%a6),%a0
2765	mov.l		%a0,EXC_A7(%a6)
2766	bsr.l		fmovm_dynamic		# do dynamic fmovm
2767
2768	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2769	beq.w		iea_fmovm_data_predec
2770	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
2771	bne.w		iea_fmovm_exit
2772
2773# right now, d0 = the size.
2774# the data has been fetched from the supervisor stack, but we have not
2775# incremented the stack pointer by the appropriate number of bytes.
2776# do it here.
2777iea_fmovm_data_postinc:
2778	btst		&0x7,EXC_SR(%a6)
2779	bne.b		iea_fmovm_data_pi_trace
2780
2781	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2782	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2783	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2784
2785	lea		(EXC_SR,%a6,%d0),%a0
2786	mov.l		%a0,EXC_SR(%a6)
2787
2788	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2789	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2790 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
2791
2792	unlk		%a6
2793	mov.l		(%sp)+,%sp
2794	bra.l		_fpsp_done
2795
2796iea_fmovm_data_pi_trace:
2797	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2798	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2799	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2800	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2801
2802	lea		(EXC_SR-0x4,%a6,%d0),%a0
2803	mov.l		%a0,EXC_SR(%a6)
2804
2805	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2806	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2807 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
2808
2809	unlk		%a6
2810	mov.l		(%sp)+,%sp
2811	bra.l		_real_trace
2812
2813# right now, d1 = size and d0 = the strg.
2814iea_fmovm_data_predec:
2815	mov.b		%d1,EXC_VOFF(%a6)	# store strg
2816	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
2817
2818	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2819	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2820 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
2821
2822	mov.l		(%a6),-(%sp)		# make a copy of a6
2823	mov.l		%d0,-(%sp)		# save d0
2824	mov.l		%d1,-(%sp)		# save d1
2825	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
2826
2827	clr.l		%d0
2828	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
2829	neg.l		%d0			# get negative of size
2830
2831	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
2832	beq.b		iea_fmovm_data_p2
2833
2834	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2835	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2836	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
2837	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2838
2839	pea		(%a6,%d0)		# create final sp
2840	bra.b		iea_fmovm_data_p3
2841
2842iea_fmovm_data_p2:
2843	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2844	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
2845	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2846
2847	pea		(0x4,%a6,%d0)		# create final sp
2848
2849iea_fmovm_data_p3:
2850	clr.l		%d1
2851	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
2852
2853	tst.b		%d1
2854	bpl.b		fm_1
2855	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
2856	addi.l		&0xc,%d0
2857fm_1:
2858	lsl.b		&0x1,%d1
2859	bpl.b		fm_2
2860	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
2861	addi.l		&0xc,%d0
2862fm_2:
2863	lsl.b		&0x1,%d1
2864	bpl.b		fm_3
2865	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
2866	addi.l		&0xc,%d0
2867fm_3:
2868	lsl.b		&0x1,%d1
2869	bpl.b		fm_4
2870	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
2871	addi.l		&0xc,%d0
2872fm_4:
2873	lsl.b		&0x1,%d1
2874	bpl.b		fm_5
2875	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
2876	addi.l		&0xc,%d0
2877fm_5:
2878	lsl.b		&0x1,%d1
2879	bpl.b		fm_6
2880	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
2881	addi.l		&0xc,%d0
2882fm_6:
2883	lsl.b		&0x1,%d1
2884	bpl.b		fm_7
2885	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
2886	addi.l		&0xc,%d0
2887fm_7:
2888	lsl.b		&0x1,%d1
2889	bpl.b		fm_end
2890	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
2891fm_end:
2892	mov.l		0x4(%sp),%d1
2893	mov.l		0x8(%sp),%d0
2894	mov.l		0xc(%sp),%a6
2895	mov.l		(%sp)+,%sp
2896
2897	btst		&0x7,(%sp)		# is trace enabled?
2898	beq.l		_fpsp_done
2899	bra.l		_real_trace
2900
2901#########################################################################
2902iea_fmovm_ctrl:
2903
2904	bsr.l		fmovm_ctrl		# load ctrl regs
2905
2906iea_fmovm_exit:
2907	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2908	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2909	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2910
2911	btst		&0x7,EXC_SR(%a6)	# is trace on?
2912	bne.b		iea_fmovm_trace		# yes
2913
2914	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2915
2916	unlk		%a6			# unravel the frame
2917
2918	bra.l		_fpsp_done		# exit to os
2919
2920#
2921# The control reg instruction that took an "Unimplemented Effective Address"
2922# exception was being traced. The "Current PC" for the trace frame is the
2923# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2924# After fixing the stack frame, jump to _real_trace().
2925#
2926#		 UNIMP EA FRAME		   TRACE FRAME
2927#		*****************	*****************
2928#		* 0x0 *  0x0f0	*	*    Current	*
2929#		*****************	*      PC	*
2930#		*    Current	*	*****************
2931#		*      PC	*	* 0x2 *  0x024	*
2932#		*****************	*****************
2933#		*      SR	*	*     Next	*
2934#		*****************	*      PC	*
2935#					*****************
2936#					*      SR	*
2937#					*****************
2938# this ain't a pretty solution, but it works:
2939# -restore a6 (not with unlk)
2940# -shift stack frame down over where old a6 used to be
2941# -add LOCAL_SIZE to stack pointer
2942iea_fmovm_trace:
2943	mov.l		(%a6),%a6		# restore frame pointer
2944	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2945	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2946	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2947	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2948	add.l		&LOCAL_SIZE,%sp		# clear stack frame
2949
2950	bra.l		_real_trace
2951
2952#########################################################################
2953# The FPU is disabled and so we should really have taken the "Line
2954# F Emulator" exception. So, here we create an 8-word stack frame
2955# from our 4-word stack frame. This means we must calculate the length
2956# of the faulting instruction to get the "next PC". This is trivial for
2957# immediate operands but requires some extra work for fmovm dynamic
2958# which can use most addressing modes.
2959iea_disabled:
2960	mov.l		(%sp)+,%d0		# restore d0
2961
2962	link		%a6,&-LOCAL_SIZE	# init stack frame
2963
2964	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2965
2966# PC of instruction that took the exception is the PC in the frame
2967	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2968	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2969	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2970	bsr.l		_imem_read_long		# fetch the instruction words
2971	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2972
2973	tst.w		%d0			# is instr fmovm?
2974	bmi.b		iea_dis_fmovm		# yes
2975# instruction is using an extended precision immediate operand. therefore,
2976# the total instruction length is 16 bytes.
2977iea_dis_immed:
2978	mov.l		&0x10,%d0		# 16 bytes of instruction
2979	bra.b		iea_dis_cont
2980iea_dis_fmovm:
2981	btst		&0xe,%d0		# is instr fmovm ctrl
2982	bne.b		iea_dis_fmovm_data	# no
2983# the instruction is a fmovm.l with 2 or 3 registers.
2984	bfextu		%d0{&19:&3},%d1
2985	mov.l		&0xc,%d0
2986	cmpi.b		%d1,&0x7		# move all regs?
2987	bne.b		iea_dis_cont
2988	addq.l		&0x4,%d0
2989	bra.b		iea_dis_cont
2990# the instruction is an fmovm.x dynamic which can use many addressing
2991# modes and thus can have several different total instruction lengths.
2992# call fmovm_calc_ea which will go through the ea calc process and,
2993# as a by-product, will tell us how long the instruction is.
2994iea_dis_fmovm_data:
2995	clr.l		%d0
2996	bsr.l		fmovm_calc_ea
2997	mov.l		EXC_EXTWPTR(%a6),%d0
2998	sub.l		EXC_PC(%a6),%d0
2999iea_dis_cont:
3000	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
3001
3002	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3003
3004	unlk		%a6
3005
3006# here, we actually create the 8-word frame from the 4-word frame,
3007# with the "next PC" as additional info.
3008# the <ea> field is let as undefined.
3009	subq.l		&0x8,%sp		# make room for new stack
3010	mov.l		%d0,-(%sp)		# save d0
3011	mov.w		0xc(%sp),0x4(%sp)	# move SR
3012	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
3013	clr.l		%d0
3014	mov.w		0x12(%sp),%d0
3015	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
3016	add.l		%d0,0x6(%sp)		# make Next PC
3017	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
3018	mov.l		(%sp)+,%d0		# restore d0
3019
3020	bra.l		_real_fpu_disabled
3021
3022##########
3023
3024iea_iacc:
3025	movc		%pcr,%d0
3026	btst		&0x1,%d0
3027	bne.b		iea_iacc_cont
3028	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3029	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3030iea_iacc_cont:
3031	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3032
3033	unlk		%a6
3034
3035	subq.w		&0x8,%sp		# make stack frame bigger
3036	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
3037	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
3038	mov.w		&0x4008,0x6(%sp)	# store voff
3039	mov.l		0x2(%sp),0x8(%sp)	# store ea
3040	mov.l		&0x09428001,0xc(%sp)	# store fslw
3041
3042iea_acc_done:
3043	btst		&0x5,(%sp)		# user or supervisor mode?
3044	beq.b		iea_acc_done2		# user
3045	bset		&0x2,0xd(%sp)		# set supervisor TM bit
3046
3047iea_acc_done2:
3048	bra.l		_real_access
3049
3050iea_dacc:
3051	lea		-LOCAL_SIZE(%a6),%sp
3052
3053	movc		%pcr,%d1
3054	btst		&0x1,%d1
3055	bne.b		iea_dacc_cont
3056	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3057	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3058iea_dacc_cont:
3059	mov.l		(%a6),%a6
3060
3061	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3062	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3063	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3064	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
3065	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
3066	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3067
3068	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3069	add.w		&LOCAL_SIZE-0x4,%sp
3070
3071	bra.b		iea_acc_done
3072
3073#########################################################################
3074# XDEF ****************************************************************	#
3075#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
3076#									#
3077#	This handler should be the first code executed upon taking the	#
3078# 	FP Operand Error exception in an operating system.		#
3079#									#
3080# XREF ****************************************************************	#
3081#	_imem_read_long() - read instruction longword			#
3082#	fix_skewed_ops() - adjust src operand in fsave frame		#
3083#	_real_operr() - "callout" to operating system operr handler	#
3084#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3085#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3086#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
3087#									#
3088# INPUT ***************************************************************	#
3089#	- The system stack contains the FP Operr exception frame	#
3090#	- The fsave frame contains the source operand			#
3091# 									#
3092# OUTPUT **************************************************************	#
3093#	No access error:						#
3094#	- The system stack is unchanged					#
3095#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3096#									#
3097# ALGORITHM ***********************************************************	#
3098#	In a system where the FP Operr exception is enabled, the goal	#
3099# is to get to the handler specified at _real_operr(). But, on the 060,	#
3100# for opclass zero and two instruction taking this exception, the 	#
3101# input operand in the fsave frame may be incorrect for some cases	#
3102# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3103# do just this and then exits through _real_operr().			#
3104#	For opclass 3 instructions, the 060 doesn't store the default	#
3105# operr result out to memory or data register file as it should.	#
3106# This code must emulate the move out before finally exiting through	#
3107# _real_inex(). The move out, if to memory, is performed using 		#
3108# _mem_write() "callout" routines that may return a failing result.	#
3109# In this special case, the handler must exit through facc_out() 	#
3110# which creates an access error stack frame from the current operr	#
3111# stack frame.								#
3112#									#
3113#########################################################################
3114
3115	global		_fpsp_operr
3116_fpsp_operr:
3117
3118	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3119
3120	fsave		FP_SRC(%a6)		# grab the "busy" frame
3121
3122 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3123	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3124 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3125
3126# the FPIAR holds the "current PC" of the faulting instruction
3127	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3128
3129	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3130	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3131	bsr.l		_imem_read_long		# fetch the instruction words
3132	mov.l		%d0,EXC_OPWORD(%a6)
3133
3134##############################################################################
3135
3136	btst		&13,%d0			# is instr an fmove out?
3137	bne.b		foperr_out		# fmove out
3138
3139
3140# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3141# this would be the case for opclass two operations with a source infinity or
3142# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3143# cause an operr so we don't need to check for them here.
3144	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3145	bsr.l		fix_skewed_ops		# fix src op
3146
3147foperr_exit:
3148	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3149	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3150	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3151
3152	frestore	FP_SRC(%a6)
3153
3154	unlk		%a6
3155	bra.l		_real_operr
3156
3157########################################################################
3158
3159#
3160# the hardware does not save the default result to memory on enabled
3161# operand error exceptions. we do this here before passing control to
3162# the user operand error handler.
3163#
3164# byte, word, and long destination format operations can pass
3165# through here. we simply need to test the sign of the src
3166# operand and save the appropriate minimum or maximum integer value
3167# to the effective address as pointed to by the stacked effective address.
3168#
3169# although packed opclass three operations can take operand error
3170# exceptions, they won't pass through here since they are caught
3171# first by the unsupported data format exception handler. that handler
3172# sends them directly to _real_operr() if necessary.
3173#
3174foperr_out:
3175
3176	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
3177	andi.w		&0x7fff,%d1
3178	cmpi.w		%d1,&0x7fff
3179	bne.b		foperr_out_not_qnan
3180# the operand is either an infinity or a QNAN.
3181	tst.l		FP_SRC_LO(%a6)
3182	bne.b		foperr_out_qnan
3183	mov.l		FP_SRC_HI(%a6),%d1
3184	andi.l		&0x7fffffff,%d1
3185	beq.b		foperr_out_not_qnan
3186foperr_out_qnan:
3187	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
3188	bra.b		foperr_out_jmp
3189
3190foperr_out_not_qnan:
3191	mov.l		&0x7fffffff,%d1
3192	tst.b		FP_SRC_EX(%a6)
3193	bpl.b		foperr_out_not_qnan2
3194	addq.l		&0x1,%d1
3195foperr_out_not_qnan2:
3196	mov.l		%d1,L_SCR1(%a6)
3197
3198foperr_out_jmp:
3199	bfextu		%d0{&19:&3},%d0		# extract dst format field
3200	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3201	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
3202	jmp		(tbl_operr.b,%pc,%a0)
3203
3204tbl_operr:
3205	short		foperr_out_l - tbl_operr # long word integer
3206	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
3207	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
3208	short		foperr_exit  - tbl_operr # packed won't enter here
3209	short		foperr_out_w - tbl_operr # word integer
3210	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
3211	short		foperr_out_b - tbl_operr # byte integer
3212	short		tbl_operr    - tbl_operr # packed won't enter here
3213
3214foperr_out_b:
3215	mov.b		L_SCR1(%a6),%d0		# load positive default result
3216	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3217	ble.b		foperr_out_b_save_dn	# yes
3218	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3219	bsr.l		_dmem_write_byte	# write the default result
3220
3221	tst.l		%d1			# did dstore fail?
3222	bne.l		facc_out_b		# yes
3223
3224	bra.w		foperr_exit
3225foperr_out_b_save_dn:
3226	andi.w		&0x0007,%d1
3227	bsr.l		store_dreg_b		# store result to regfile
3228	bra.w		foperr_exit
3229
3230foperr_out_w:
3231	mov.w		L_SCR1(%a6),%d0		# load positive default result
3232	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3233	ble.b		foperr_out_w_save_dn	# yes
3234	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3235	bsr.l		_dmem_write_word	# write the default result
3236
3237	tst.l		%d1			# did dstore fail?
3238	bne.l		facc_out_w		# yes
3239
3240	bra.w		foperr_exit
3241foperr_out_w_save_dn:
3242	andi.w		&0x0007,%d1
3243	bsr.l		store_dreg_w		# store result to regfile
3244	bra.w		foperr_exit
3245
3246foperr_out_l:
3247	mov.l		L_SCR1(%a6),%d0		# load positive default result
3248	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3249	ble.b		foperr_out_l_save_dn	# yes
3250	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3251	bsr.l		_dmem_write_long	# write the default result
3252
3253	tst.l		%d1			# did dstore fail?
3254	bne.l		facc_out_l		# yes
3255
3256	bra.w		foperr_exit
3257foperr_out_l_save_dn:
3258	andi.w		&0x0007,%d1
3259	bsr.l		store_dreg_l		# store result to regfile
3260	bra.w		foperr_exit
3261
3262#########################################################################
3263# XDEF ****************************************************************	#
3264#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
3265#									#
3266#	This handler should be the first code executed upon taking the	#
3267# 	FP Signalling NAN exception in an operating system.		#
3268#									#
3269# XREF ****************************************************************	#
3270#	_imem_read_long() - read instruction longword			#
3271#	fix_skewed_ops() - adjust src operand in fsave frame		#
3272#	_real_snan() - "callout" to operating system SNAN handler	#
3273#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3274#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3275#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
3276#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
3277#									#
3278# INPUT ***************************************************************	#
3279#	- The system stack contains the FP SNAN exception frame		#
3280#	- The fsave frame contains the source operand			#
3281# 									#
3282# OUTPUT **************************************************************	#
3283#	No access error:						#
3284#	- The system stack is unchanged					#
3285#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3286#									#
3287# ALGORITHM ***********************************************************	#
3288#	In a system where the FP SNAN exception is enabled, the goal	#
3289# is to get to the handler specified at _real_snan(). But, on the 060,	#
3290# for opclass zero and two instructions taking this exception, the 	#
3291# input operand in the fsave frame may be incorrect for some cases	#
3292# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3293# do just this and then exits through _real_snan().			#
3294#	For opclass 3 instructions, the 060 doesn't store the default	#
3295# SNAN result out to memory or data register file as it should.		#
3296# This code must emulate the move out before finally exiting through	#
3297# _real_snan(). The move out, if to memory, is performed using 		#
3298# _mem_write() "callout" routines that may return a failing result.	#
3299# In this special case, the handler must exit through facc_out() 	#
3300# which creates an access error stack frame from the current SNAN	#
3301# stack frame.								#
3302#	For the case of an extended precision opclass 3 instruction,	#
3303# if the effective addressing mode was -() or ()+, then the address	#
3304# register must get updated by calling _calc_ea_fout(). If the <ea>	#
3305# was -(a7) from supervisor mode, then the exception frame currently	#
3306# on the system stack must be carefully moved "down" to make room	#
3307# for the operand being moved.						#
3308#									#
3309#########################################################################
3310
3311	global		_fpsp_snan
3312_fpsp_snan:
3313
3314	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3315
3316	fsave		FP_SRC(%a6)		# grab the "busy" frame
3317
3318 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3319	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3320 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3321
3322# the FPIAR holds the "current PC" of the faulting instruction
3323	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3324
3325	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3326	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3327	bsr.l		_imem_read_long		# fetch the instruction words
3328	mov.l		%d0,EXC_OPWORD(%a6)
3329
3330##############################################################################
3331
3332	btst		&13,%d0			# is instr an fmove out?
3333	bne.w		fsnan_out		# fmove out
3334
3335
3336# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3337# this would be the case for opclass two operations with a source infinity or
3338# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3339# fixed here.
3340	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3341	bsr.l		fix_skewed_ops		# fix src op
3342
3343fsnan_exit:
3344	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3345	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3346	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3347
3348	frestore	FP_SRC(%a6)
3349
3350	unlk		%a6
3351	bra.l		_real_snan
3352
3353########################################################################
3354
3355#
3356# the hardware does not save the default result to memory on enabled
3357# snan exceptions. we do this here before passing control to
3358# the user snan handler.
3359#
3360# byte, word, long, and packed destination format operations can pass
3361# through here. since packed format operations already were handled by
3362# fpsp_unsupp(), then we need to do nothing else for them here.
3363# for byte, word, and long, we simply need to test the sign of the src
3364# operand and save the appropriate minimum or maximum integer value
3365# to the effective address as pointed to by the stacked effective address.
3366#
3367fsnan_out:
3368
3369	bfextu		%d0{&19:&3},%d0		# extract dst format field
3370	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3371	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
3372	jmp		(tbl_snan.b,%pc,%a0)
3373
3374tbl_snan:
3375	short		fsnan_out_l - tbl_snan # long word integer
3376	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3377	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
3378	short		tbl_snan    - tbl_snan # packed needs no help
3379	short		fsnan_out_w - tbl_snan # word integer
3380	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3381	short		fsnan_out_b - tbl_snan # byte integer
3382	short		tbl_snan    - tbl_snan # packed needs no help
3383
3384fsnan_out_b:
3385	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
3386	bset		&6,%d0			# set SNAN bit
3387	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3388	ble.b		fsnan_out_b_dn		# yes
3389	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3390	bsr.l		_dmem_write_byte	# write the default result
3391
3392	tst.l		%d1			# did dstore fail?
3393	bne.l		facc_out_b		# yes
3394
3395	bra.w		fsnan_exit
3396fsnan_out_b_dn:
3397	andi.w		&0x0007,%d1
3398	bsr.l		store_dreg_b		# store result to regfile
3399	bra.w		fsnan_exit
3400
3401fsnan_out_w:
3402	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
3403	bset		&14,%d0			# set SNAN bit
3404	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3405	ble.b		fsnan_out_w_dn		# yes
3406	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3407	bsr.l		_dmem_write_word	# write the default result
3408
3409	tst.l		%d1			# did dstore fail?
3410	bne.l		facc_out_w		# yes
3411
3412	bra.w		fsnan_exit
3413fsnan_out_w_dn:
3414	andi.w		&0x0007,%d1
3415	bsr.l		store_dreg_w		# store result to regfile
3416	bra.w		fsnan_exit
3417
3418fsnan_out_l:
3419	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
3420	bset		&30,%d0			# set SNAN bit
3421	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3422	ble.b		fsnan_out_l_dn		# yes
3423	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3424	bsr.l		_dmem_write_long	# write the default result
3425
3426	tst.l		%d1			# did dstore fail?
3427	bne.l		facc_out_l		# yes
3428
3429	bra.w		fsnan_exit
3430fsnan_out_l_dn:
3431	andi.w		&0x0007,%d1
3432	bsr.l		store_dreg_l		# store result to regfile
3433	bra.w		fsnan_exit
3434
3435fsnan_out_s:
3436	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3437	ble.b		fsnan_out_d_dn		# yes
3438	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3439	andi.l		&0x80000000,%d0		# keep sign
3440	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3441	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3442	lsr.l		&0x8,%d1		# shift mantissa for sgl
3443	or.l		%d1,%d0			# create sgl SNAN
3444	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3445	bsr.l		_dmem_write_long	# write the default result
3446
3447	tst.l		%d1			# did dstore fail?
3448	bne.l		facc_out_l		# yes
3449
3450	bra.w		fsnan_exit
3451fsnan_out_d_dn:
3452	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3453	andi.l		&0x80000000,%d0		# keep sign
3454	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3455	mov.l		%d1,-(%sp)
3456	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3457	lsr.l		&0x8,%d1		# shift mantissa for sgl
3458	or.l		%d1,%d0			# create sgl SNAN
3459	mov.l		(%sp)+,%d1
3460	andi.w		&0x0007,%d1
3461	bsr.l		store_dreg_l		# store result to regfile
3462	bra.w		fsnan_exit
3463
3464fsnan_out_d:
3465	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3466	andi.l		&0x80000000,%d0		# keep sign
3467	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
3468	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3469	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
3470	mov.l		&11,%d0			# load shift amt
3471	lsr.l		%d0,%d1
3472	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
3473	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3474	andi.l		&0x000007ff,%d1
3475	ror.l		%d0,%d1
3476	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
3477	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
3478	lsr.l		%d0,%d1
3479	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
3480	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3481	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
3482	movq.l		&0x8,%d0		# pass: size of 8 bytes
3483	bsr.l		_dmem_write		# write the default result
3484
3485	tst.l		%d1			# did dstore fail?
3486	bne.l		facc_out_d		# yes
3487
3488	bra.w		fsnan_exit
3489
3490# for extended precision, if the addressing mode is pre-decrement or
3491# post-increment, then the address register did not get updated.
3492# in addition, for pre-decrement, the stacked <ea> is incorrect.
3493fsnan_out_x:
3494	clr.b		SPCOND_FLG(%a6)		# clear special case flag
3495
3496	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3497	clr.w		2+FP_SCR0(%a6)
3498	mov.l		FP_SRC_HI(%a6),%d0
3499	bset		&30,%d0
3500	mov.l		%d0,FP_SCR0_HI(%a6)
3501	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3502
3503	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
3504	bne.b		fsnan_out_x_s		# yes
3505
3506	mov.l		%usp,%a0		# fetch user stack pointer
3507	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
3508	mov.l		(%a6),EXC_A6(%a6)
3509
3510	bsr.l		_calc_ea_fout		# find the correct ea,update An
3511	mov.l		%a0,%a1
3512	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3513
3514	mov.l		EXC_A7(%a6),%a0
3515	mov.l		%a0,%usp		# restore user stack pointer
3516	mov.l		EXC_A6(%a6),(%a6)
3517
3518fsnan_out_x_save:
3519	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3520	movq.l		&0xc,%d0		# pass: size of extended
3521	bsr.l		_dmem_write		# write the default result
3522
3523	tst.l		%d1			# did dstore fail?
3524	bne.l		facc_out_x		# yes
3525
3526	bra.w		fsnan_exit
3527
3528fsnan_out_x_s:
3529	mov.l		(%a6),EXC_A6(%a6)
3530
3531	bsr.l		_calc_ea_fout		# find the correct ea,update An
3532	mov.l		%a0,%a1
3533	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3534
3535	mov.l		EXC_A6(%a6),(%a6)
3536
3537	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3538	bne.b		fsnan_out_x_save	# no
3539
3540# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3541	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3542	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3543	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3544
3545	frestore	FP_SRC(%a6)
3546
3547	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
3548
3549	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3550	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3551	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3552
3553	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3554	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3555	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3556
3557	add.l		&LOCAL_SIZE-0x8,%sp
3558
3559	bra.l		_real_snan
3560
3561#########################################################################
3562# XDEF ****************************************************************	#
3563#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
3564#									#
3565#	This handler should be the first code executed upon taking the	#
3566# 	FP Inexact exception in an operating system.			#
3567#									#
3568# XREF ****************************************************************	#
3569#	_imem_read_long() - read instruction longword			#
3570#	fix_skewed_ops() - adjust src operand in fsave frame		#
3571#	set_tag_x() - determine optype of src/dst operands		#
3572#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3573#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
3574#	load_fpn2() - load dst operand from FP regfile			#
3575#	smovcr() - emulate an "fmovcr" instruction			#
3576#	fout() - emulate an opclass 3 instruction			#
3577#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
3578#	_real_inex() - "callout" to operating system inexact handler	#
3579#									#
3580# INPUT ***************************************************************	#
3581#	- The system stack contains the FP Inexact exception frame	#
3582#	- The fsave frame contains the source operand			#
3583# 									#
3584# OUTPUT **************************************************************	#
3585#	- The system stack is unchanged					#
3586#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3587#									#
3588# ALGORITHM ***********************************************************	#
3589#	In a system where the FP Inexact exception is enabled, the goal	#
3590# is to get to the handler specified at _real_inex(). But, on the 060,	#
3591# for opclass zero and two instruction taking this exception, the 	#
3592# hardware doesn't store the correct result to the destination FP	#
3593# register as did the '040 and '881/2. This handler must emulate the 	#
3594# instruction in order to get this value and then store it to the 	#
3595# correct register before calling _real_inex().				#
3596#	For opclass 3 instructions, the 060 doesn't store the default	#
3597# inexact result out to memory or data register file as it should.	#
3598# This code must emulate the move out by calling fout() before finally	#
3599# exiting through _real_inex().						#
3600#									#
3601#########################################################################
3602
3603	global		_fpsp_inex
3604_fpsp_inex:
3605
3606	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3607
3608	fsave		FP_SRC(%a6)		# grab the "busy" frame
3609
3610 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3611	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3612 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3613
3614# the FPIAR holds the "current PC" of the faulting instruction
3615	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3616
3617	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3618	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3619	bsr.l		_imem_read_long		# fetch the instruction words
3620	mov.l		%d0,EXC_OPWORD(%a6)
3621
3622##############################################################################
3623
3624	btst		&13,%d0			# is instr an fmove out?
3625	bne.w		finex_out		# fmove out
3626
3627
3628# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3629# longword integer directly into the upper longword of the mantissa along
3630# w/ an exponent value of 0x401e. we convert this to extended precision here.
3631	bfextu		%d0{&19:&3},%d0		# fetch instr size
3632	bne.b		finex_cont		# instr size is not long
3633	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
3634	bne.b		finex_cont		# no
3635	fmov.l		&0x0,%fpcr
3636	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
3637	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
3638	mov.w		&0xe001,0x2+FP_SRC(%a6)
3639
3640finex_cont:
3641	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3642	bsr.l		fix_skewed_ops		# fix src op
3643
3644# Here, we zero the ccode and exception byte field since we're going to
3645# emulate the whole instruction. Notice, though, that we don't kill the
3646# INEX1 bit. This is because a packed op has long since been converted
3647# to extended before arriving here. Therefore, we need to retain the
3648# INEX1 bit from when the operand was first converted.
3649	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3650
3651	fmov.l		&0x0,%fpcr		# zero current control regs
3652	fmov.l		&0x0,%fpsr
3653
3654	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3655	cmpi.b		%d1,&0x17		# is op an fmovecr?
3656	beq.w		finex_fmovcr		# yes
3657
3658	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3659	bsr.l		set_tag_x		# tag the operand type
3660	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
3661
3662# bits four and five of the fp extension word separate the monadic and dyadic
3663# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3664# will never take this exception, but fsincos will.
3665	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
3666	beq.b		finex_extract		# monadic
3667
3668	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
3669	bne.b		finex_extract		# yes
3670
3671	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3672	bsr.l		load_fpn2		# load dst into FP_DST
3673
3674	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
3675	bsr.l		set_tag_x		# tag the operand type
3676	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
3677	bne.b		finex_op2_done		# no
3678	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
3679finex_op2_done:
3680	mov.b		%d0,DTAG(%a6)		# save dst optype tag
3681
3682finex_extract:
3683	clr.l		%d0
3684	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
3685
3686	mov.b		1+EXC_CMDREG(%a6),%d1
3687	andi.w		&0x007f,%d1		# extract extension
3688
3689	lea		FP_SRC(%a6),%a0
3690	lea		FP_DST(%a6),%a1
3691
3692	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3693	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
3694
3695# the operation has been emulated. the result is in fp0.
3696finex_save:
3697	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
3698	bsr.l		store_fpreg
3699
3700finex_exit:
3701	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3702	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3703	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3704
3705	frestore	FP_SRC(%a6)
3706
3707	unlk		%a6
3708	bra.l		_real_inex
3709
3710finex_fmovcr:
3711	clr.l		%d0
3712	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3713	mov.b		1+EXC_CMDREG(%a6),%d1
3714	andi.l		&0x0000007f,%d1		# pass rom offset
3715	bsr.l		smovcr
3716	bra.b		finex_save
3717
3718########################################################################
3719
3720#
3721# the hardware does not save the default result to memory on enabled
3722# inexact exceptions. we do this here before passing control to
3723# the user inexact handler.
3724#
3725# byte, word, and long destination format operations can pass
3726# through here. so can double and single precision.
3727# although packed opclass three operations can take inexact
3728# exceptions, they won't pass through here since they are caught
3729# first by the unsupported data format exception handler. that handler
3730# sends them directly to _real_inex() if necessary.
3731#
3732finex_out:
3733
3734	mov.b		&NORM,STAG(%a6)		# src is a NORM
3735
3736	clr.l		%d0
3737	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3738
3739	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
3740
3741	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
3742
3743	bsr.l		fout			# store the default result
3744
3745	bra.b		finex_exit
3746
3747#########################################################################
3748# XDEF ****************************************************************	#
3749#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
3750#									#
3751#	This handler should be the first code executed upon taking	#
3752#	the FP DZ exception in an operating system.			#
3753#									#
3754# XREF ****************************************************************	#
3755#	_imem_read_long() - read instruction longword from memory	#
3756#	fix_skewed_ops() - adjust fsave operand				#
3757#	_real_dz() - "callout" exit point from FP DZ handler		#
3758#									#
3759# INPUT ***************************************************************	#
3760#	- The system stack contains the FP DZ exception stack.		#
3761#	- The fsave frame contains the source operand.			#
3762# 									#
3763# OUTPUT **************************************************************	#
3764#	- The system stack contains the FP DZ exception stack.		#
3765#	- The fsave frame contains the adjusted source operand.		#
3766#									#
3767# ALGORITHM ***********************************************************	#
3768#	In a system where the DZ exception is enabled, the goal is to	#
3769# get to the handler specified at _real_dz(). But, on the 060, when the	#
3770# exception is taken, the input operand in the fsave state frame may	#
3771# be incorrect for some cases and need to be adjusted. So, this package	#
3772# adjusts the operand using fix_skewed_ops() and then branches to	#
3773# _real_dz(). 								#
3774#									#
3775#########################################################################
3776
3777	global		_fpsp_dz
3778_fpsp_dz:
3779
3780	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3781
3782	fsave		FP_SRC(%a6)		# grab the "busy" frame
3783
3784 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3785	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3786 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3787
3788# the FPIAR holds the "current PC" of the faulting instruction
3789	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3790
3791	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3792	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3793	bsr.l		_imem_read_long		# fetch the instruction words
3794	mov.l		%d0,EXC_OPWORD(%a6)
3795
3796##############################################################################
3797
3798
3799# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3800# this would be the case for opclass two operations with a source zero
3801# in the sgl or dbl format.
3802	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3803	bsr.l		fix_skewed_ops		# fix src op
3804
3805fdz_exit:
3806	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3807	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3808	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3809
3810	frestore	FP_SRC(%a6)
3811
3812	unlk		%a6
3813	bra.l		_real_dz
3814
3815#########################################################################
3816# XDEF ****************************************************************	#
3817#	_fpsp_fline(): 060FPSP entry point for "Line F emulator" exc.	#
3818#									#
3819#	This handler should be the first code executed upon taking the	#
3820#	"Line F Emulator" exception in an operating system.		#
3821#									#
3822# XREF ****************************************************************	#
3823#	_fpsp_unimp() - handle "FP Unimplemented" exceptions		#
3824#	_real_fpu_disabled() - handle "FPU disabled" exceptions		#
3825#	_real_fline() - handle "FLINE" exceptions			#
3826#	_imem_read_long() - read instruction longword			#
3827#									#
3828# INPUT ***************************************************************	#
3829#	- The system stack contains a "Line F Emulator" exception	#
3830#	  stack frame.							#
3831# 									#
3832# OUTPUT **************************************************************	#
3833#	- The system stack is unchanged					#
3834#									#
3835# ALGORITHM ***********************************************************	#
3836#	When a "Line F Emulator" exception occurs, there are 3 possible	#
3837# exception types, denoted by the exception stack frame format number:	#
3838#	(1) FPU unimplemented instruction (6 word stack frame)		#
3839#	(2) FPU disabled (8 word stack frame)				#
3840#	(3) Line F (4 word stack frame)					#
3841#									#
3842#	This module determines which and forks the flow off to the 	#
3843# appropriate "callout" (for "disabled" and "Line F") or to the		#
3844# correct emulation code (for "FPU unimplemented").			#
3845#	This code also must check for "fmovecr" instructions w/ a	#
3846# non-zero <ea> field. These may get flagged as "Line F" but should	#
3847# really be flagged as "FPU Unimplemented". (This is a "feature" on	#
3848# the '060.								#
3849#									#
3850#########################################################################
3851
3852	global		_fpsp_fline
3853_fpsp_fline:
3854
3855# check to see if this exception is a "FP Unimplemented Instruction"
3856# exception. if so, branch directly to that handler's entry point.
3857	cmpi.w		0x6(%sp),&0x202c
3858	beq.l		_fpsp_unimp
3859
3860# check to see if the FPU is disabled. if so, jump to the OS entry
3861# point for that condition.
3862	cmpi.w		0x6(%sp),&0x402c
3863	beq.l		_real_fpu_disabled
3864
3865# the exception was an "F-Line Illegal" exception. we check to see
3866# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
3867# so, convert the F-Line exception stack frame to an FP Unimplemented
3868# Instruction exception stack frame else branch to the OS entry
3869# point for the F-Line exception handler.
3870	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3871
3872	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3873
3874	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
3875	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3876	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3877	bsr.l		_imem_read_long		# fetch instruction words
3878
3879	bfextu		%d0{&0:&10},%d1		# is it an fmovecr?
3880	cmpi.w		%d1,&0x03c8
3881	bne.b		fline_fline		# no
3882
3883	bfextu		%d0{&16:&6},%d1		# is it an fmovecr?
3884	cmpi.b		%d1,&0x17
3885	bne.b		fline_fline		# no
3886
3887# it's an fmovecr w/ a non-zero <ea> that has entered through
3888# the F-Line Illegal exception.
3889# so, we need to convert the F-Line exception stack frame into an
3890# FP Unimplemented Instruction stack frame and jump to that entry
3891# point.
3892#
3893# but, if the FPU is disabled, then we need to jump to the FPU diabled
3894# entry point.
3895	movc		%pcr,%d0
3896	btst		&0x1,%d0
3897	beq.b		fline_fmovcr
3898
3899	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3900
3901	unlk		%a6
3902
3903	sub.l		&0x8,%sp		# make room for "Next PC", <ea>
3904	mov.w		0x8(%sp),(%sp)
3905	mov.l		0xa(%sp),0x2(%sp)	# move "Current PC"
3906	mov.w		&0x402c,0x6(%sp)
3907	mov.l		0x2(%sp),0xc(%sp)
3908	addq.l		&0x4,0x2(%sp)		# set "Next PC"
3909
3910	bra.l		_real_fpu_disabled
3911
3912fline_fmovcr:
3913	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3914
3915	unlk		%a6
3916
3917	fmov.l		0x2(%sp),%fpiar		# set current PC
3918	addq.l		&0x4,0x2(%sp)		# set Next PC
3919
3920	mov.l		(%sp),-(%sp)
3921	mov.l		0x8(%sp),0x4(%sp)
3922	mov.b		&0x20,0x6(%sp)
3923
3924	bra.l		_fpsp_unimp
3925
3926fline_fline:
3927	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3928
3929	unlk		%a6
3930
3931	bra.l		_real_fline
3932
3933#########################################################################
3934# XDEF ****************************************************************	#
3935#	_fpsp_unimp(): 060FPSP entry point for FP "Unimplemented	#
3936#		       Instruction" exception.				#
3937#									#
3938#	This handler should be the first code executed upon taking the	#
3939#	FP Unimplemented Instruction exception in an operating system.	#
3940#									#
3941# XREF ****************************************************************	#
3942#	_imem_read_{word,long}() - read instruction word/longword	#
3943#	load_fop() - load src/dst ops from memory and/or FP regfile	#
3944#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3945#	tbl_trans - addr of table of emulation routines for trnscndls	#
3946#	_real_access() - "callout" for access error exception		#
3947#	_fpsp_done() - "callout" for exit; work all done		#
3948#	_real_trace() - "callout" for Trace enabled exception		#
3949#	smovcr() - emulate "fmovecr" instruction			#
3950#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
3951#	_ftrapcc() - emulate an "ftrapcc" instruction			#
3952#	_fdbcc() - emulate an "fdbcc" instruction			#
3953#	_fscc() - emulate an "fscc" instruction				#
3954#	_real_trap() - "callout" for Trap exception			#
3955# 	_real_bsun() - "callout" for enabled Bsun exception		#
3956#									#
3957# INPUT ***************************************************************	#
3958#	- The system stack contains the "Unimplemented Instr" stk frame	#
3959# 									#
3960# OUTPUT **************************************************************	#
3961#	If access error:						#
3962#	- The system stack is changed to an access error stack frame	#
3963#	If Trace exception enabled:					#
3964#	- The system stack is changed to a Trace exception stack frame	#
3965#	Else: (normal case)						#
3966#	- Correct result has been stored as appropriate			#
3967#									#
3968# ALGORITHM ***********************************************************	#
3969#	There are two main cases of instructions that may enter here to	#
3970# be emulated: (1) the FPgen instructions, most of which were also	#
3971# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc".	#
3972#	For the first set, this handler calls the routine load_fop()	#
3973# to load the source and destination (for dyadic) operands to be used	#
3974# for instruction emulation. The correct emulation routine is then 	#
3975# chosen by decoding the instruction type and indexing into an 		#
3976# emulation subroutine index table. After emulation returns, this 	#
3977# handler checks to see if an exception should occur as a result of the #
3978# FP instruction emulation. If so, then an FP exception of the correct	#
3979# type is inserted into the FPU state frame using the "frestore"	#
3980# instruction before exiting through _fpsp_done(). In either the 	#
3981# exceptional or non-exceptional cases, we must check to see if the	#
3982# Trace exception is enabled. If so, then we must create a Trace	#
3983# exception frame from the current exception frame and exit through	#
3984# _real_trace().							#
3985# 	For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines	#
3986# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three	#
3987# may flag that a BSUN exception should be taken. If so, then the 	#
3988# current exception stack frame is converted into a BSUN exception 	#
3989# stack frame and an exit is made through _real_bsun(). If the		#
3990# instruction was "ftrapcc" and a Trap exception should result, a Trap	#
3991# exception stack frame is created from the current frame and an exit	#
3992# is made through _real_trap(). If a Trace exception is pending, then	#
3993# a Trace exception frame is created from the current frame and a jump	#
3994# is made to _real_trace(). Finally, if none of these conditions exist,	#
3995# then the handler exits though the callout _fpsp_done().		#
3996#									#
3997# 	In any of the above scenarios, if a _mem_read() or _mem_write()	#
3998# "callout" returns a failing value, then an access error stack frame	#
3999# is created from the current stack frame and an exit is made through	#
4000# _real_access().							#
4001#									#
4002#########################################################################
4003
4004#
4005# FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
4006#
4007#	*****************
4008#	*		* => <ea> of fp unimp instr.
4009#	-      EA	-
4010#	*		*
4011#	*****************
4012#	* 0x2 *  0x02c	* => frame format and vector offset(vector #11)
4013#	*****************
4014#	*		*
4015#	-    Next PC	- => PC of instr to execute after exc handling
4016#	*		*
4017#	*****************
4018#	*      SR	* => SR at the time the exception was taken
4019#	*****************
4020#
4021# Note: the !NULL bit does not get set in the fsave frame when the
4022# machine encounters an fp unimp exception. Therefore, it must be set
4023# before leaving this handler.
4024#
4025	global		_fpsp_unimp
4026_fpsp_unimp:
4027
4028	link.w		%a6,&-LOCAL_SIZE	# init stack frame
4029
4030	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
4031	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
4032	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1
4033
4034	btst		&0x5,EXC_SR(%a6)	# user mode exception?
4035	bne.b		funimp_s		# no; supervisor mode
4036
4037# save the value of the user stack pointer onto the stack frame
4038funimp_u:
4039	mov.l		%usp,%a0		# fetch user stack pointer
4040	mov.l		%a0,EXC_A7(%a6)		# store in stack frame
4041	bra.b		funimp_cont
4042
4043# store the value of the supervisor stack pointer BEFORE the exc occurred.
4044# old_sp is address just above stacked effective address.
4045funimp_s:
4046	lea		4+EXC_EA(%a6),%a0	# load old a7'
4047	mov.l		%a0,EXC_A7(%a6)		# store a7'
4048	mov.l		%a0,OLD_A7(%a6)		# make a copy
4049
4050funimp_cont:
4051
4052# the FPIAR holds the "current PC" of the faulting instruction.
4053	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
4054
4055	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4056	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
4057	bsr.l		_imem_read_long		# fetch the instruction words
4058	mov.l		%d0,EXC_OPWORD(%a6)
4059
4060############################################################################
4061
4062	fmov.l		&0x0,%fpcr		# clear FPCR
4063	fmov.l		&0x0,%fpsr		# clear FPSR
4064
4065	clr.b		SPCOND_FLG(%a6)		# clear "special case" flag
4066
4067# Divide the fp instructions into 8 types based on the TYPE field in
4068# bits 6-8 of the opword(classes 6,7 are undefined).
4069# (for the '060, only two types  can take this exception)
4070#	bftst		%d0{&7:&3}		# test TYPE
4071	btst		&22,%d0			# type 0 or 1 ?
4072	bne.w		funimp_misc		# type 1
4073
4074#########################################
4075# TYPE == 0: General instructions	#
4076#########################################
4077funimp_gen:
4078
4079	clr.b		STORE_FLG(%a6)		# clear "store result" flag
4080
4081# clear the ccode byte and exception status byte
4082	andi.l		&0x00ff00ff,USER_FPSR(%a6)
4083
4084	bfextu		%d0{&16:&6},%d1		# extract upper 6 of cmdreg
4085	cmpi.b		%d1,&0x17		# is op an fmovecr?
4086	beq.w		funimp_fmovcr		# yes
4087
4088funimp_gen_op:
4089	bsr.l		_load_fop		# load
4090
4091	clr.l		%d0
4092	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode
4093
4094	mov.b		1+EXC_CMDREG(%a6),%d1
4095	andi.w		&0x003f,%d1		# extract extension bits
4096	lsl.w		&0x3,%d1		# shift right 3 bits
4097	or.b		STAG(%a6),%d1		# insert src optag bits
4098
4099	lea		FP_DST(%a6),%a1		# pass dst ptr in a1
4100	lea		FP_SRC(%a6),%a0		# pass src ptr in a0
4101
4102	mov.w		(tbl_trans.w,%pc,%d1.w*2),%d1
4103	jsr		(tbl_trans.w,%pc,%d1.w*1) # emulate
4104
4105funimp_fsave:
4106	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
4107	bne.w		funimp_ena		# some are enabled
4108
4109funimp_store:
4110	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
4111	bsr.l		store_fpreg		# store result to fp regfile
4112
4113funimp_gen_exit:
4114	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4115	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4116 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
4117
4118funimp_gen_exit_cmp:
4119	cmpi.b		SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
4120	beq.b		funimp_gen_exit_a7	# yes
4121
4122	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
4123	beq.b		funimp_gen_exit_a7	# yes
4124
4125funimp_gen_exit_cont:
4126	unlk		%a6
4127
4128funimp_gen_exit_cont2:
4129	btst		&0x7,(%sp)		# is trace on?
4130	beq.l		_fpsp_done		# no
4131
4132# this catches a problem with the case where an exception will be re-inserted
4133# into the machine. the frestore has already been executed...so, the fmov.l
4134# alone of the control register would trigger an unwanted exception.
4135# until I feel like fixing this, we'll sidestep the exception.
4136	fsave		-(%sp)
4137	fmov.l		%fpiar,0x14(%sp)	# "Current PC" is in FPIAR
4138	frestore	(%sp)+
4139	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x24
4140	bra.l		_real_trace
4141
4142funimp_gen_exit_a7:
4143	btst		&0x5,EXC_SR(%a6)	# supervisor or user mode?
4144	bne.b		funimp_gen_exit_a7_s	# supervisor
4145
4146	mov.l		%a0,-(%sp)
4147	mov.l		EXC_A7(%a6),%a0
4148	mov.l		%a0,%usp
4149	mov.l		(%sp)+,%a0
4150	bra.b		funimp_gen_exit_cont
4151
4152# if the instruction was executed from supervisor mode and the addressing
4153# mode was (a7)+, then the stack frame for the rte must be shifted "up"
4154# "n" bytes where "n" is the size of the src operand type.
4155# f<op>.{b,w,l,s,d,x,p}
4156funimp_gen_exit_a7_s:
4157	mov.l		%d0,-(%sp)		# save d0
4158	mov.l		EXC_A7(%a6),%d0		# load new a7'
4159	sub.l		OLD_A7(%a6),%d0		# subtract old a7'
4160	mov.l		0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
4161	mov.l		EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
4162	mov.w		%d0,EXC_SR(%a6)		# store incr number
4163	mov.l		(%sp)+,%d0		# restore d0
4164
4165	unlk		%a6
4166
4167	add.w		(%sp),%sp		# stack frame shifted
4168	bra.b		funimp_gen_exit_cont2
4169
4170######################
4171# fmovecr.x #ccc,fpn #
4172######################
4173funimp_fmovcr:
4174	clr.l		%d0
4175	mov.b		FPCR_MODE(%a6),%d0
4176	mov.b		1+EXC_CMDREG(%a6),%d1
4177	andi.l		&0x0000007f,%d1		# pass rom offset in d1
4178	bsr.l		smovcr
4179	bra.w		funimp_fsave
4180
4181#########################################################################
4182
4183#
4184# the user has enabled some exceptions. we figure not to see this too
4185# often so that's why it gets lower priority.
4186#
4187funimp_ena:
4188
4189# was an exception set that was also enabled?
4190	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled and set
4191	bfffo		%d0{&24:&8},%d0		# find highest priority exception
4192	bne.b		funimp_exc		# at least one was set
4193
4194# no exception that was enabled was set BUT if we got an exact overflow
4195# and overflow wasn't enabled but inexact was (yech!) then this is
4196# an inexact exception; otherwise, return to normal non-exception flow.
4197	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4198	beq.w		funimp_store		# no; return to normal flow
4199
4200# the overflow w/ exact result happened but was inexact set in the FPCR?
4201funimp_ovfl:
4202	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
4203	beq.w		funimp_store		# no; return to normal flow
4204	bra.b		funimp_exc_ovfl		# yes
4205
4206# some exception happened that was actually enabled.
4207# we'll insert this new exception into the FPU and then return.
4208funimp_exc:
4209	subi.l		&24,%d0			# fix offset to be 0-8
4210	cmpi.b		%d0,&0x6		# is exception INEX?
4211	bne.b		funimp_exc_force	# no
4212
4213# the enabled exception was inexact. so, if it occurs with an overflow
4214# or underflow that was disabled, then we have to force an overflow or
4215# underflow frame. the eventual overflow or underflow handler will see that
4216# it's actually an inexact and act appropriately. this is the only easy
4217# way to have the EXOP available for the enabled inexact handler when
4218# a disabled overflow or underflow has also happened.
4219	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4220	bne.b		funimp_exc_ovfl		# yes
4221	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
4222	bne.b		funimp_exc_unfl		# yes
4223
4224# force the fsave exception status bits to signal an exception of the
4225# appropriate type. don't forget to "skew" the source operand in case we
4226# "unskewed" the one the hardware initially gave us.
4227funimp_exc_force:
4228	mov.l		%d0,-(%sp)		# save d0
4229	bsr.l		funimp_skew		# check for special case
4230	mov.l		(%sp)+,%d0		# restore d0
4231	mov.w		(tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
4232	bra.b		funimp_gen_exit2	# exit with frestore
4233
4234tbl_funimp_except:
4235	short		0xe002, 0xe006, 0xe004, 0xe005
4236	short		0xe003, 0xe002, 0xe001, 0xe001
4237
4238# insert an overflow frame
4239funimp_exc_ovfl:
4240	bsr.l		funimp_skew		# check for special case
4241	mov.w		&0xe005,2+FP_SRC(%a6)
4242	bra.b		funimp_gen_exit2
4243
4244# insert an underflow frame
4245funimp_exc_unfl:
4246	bsr.l		funimp_skew		# check for special case
4247	mov.w		&0xe003,2+FP_SRC(%a6)
4248
4249# this is the general exit point for an enabled exception that will be
4250# restored into the machine for the instruction just emulated.
4251funimp_gen_exit2:
4252	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4253	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4254 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
4255
4256	frestore	FP_SRC(%a6)		# insert exceptional status
4257
4258	bra.w		funimp_gen_exit_cmp
4259
4260############################################################################
4261
4262#
4263# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
4264#
4265# These instructions were implemented on the '881/2 and '040 in hardware but
4266# are emulated in software on the '060.
4267#
4268funimp_misc:
4269	bfextu		%d0{&10:&3},%d1		# extract mode field
4270	cmpi.b		%d1,&0x1		# is it an fdb<cc>?
4271	beq.w		funimp_fdbcc		# yes
4272	cmpi.b		%d1,&0x7		# is it an fs<cc>?
4273	bne.w		funimp_fscc		# yes
4274	bfextu		%d0{&13:&3},%d1
4275	cmpi.b		%d1,&0x2		# is it an fs<cc>?
4276	blt.w		funimp_fscc		# yes
4277
4278#########################
4279# ftrap<cc>		#
4280# ftrap<cc>.w #<data>	#
4281# ftrap<cc>.l #<data>	#
4282#########################
4283funimp_ftrapcc:
4284
4285	bsr.l		_ftrapcc		# FTRAP<cc>()
4286
4287	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4288	beq.w		funimp_bsun		# yes
4289
4290	cmpi.b		SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
4291	bne.w		funimp_done		# no
4292
4293#	 FP UNIMP FRAME		   TRAP  FRAME
4294#	*****************	*****************
4295#	**    <EA>     **	**  Current PC **
4296#	*****************	*****************
4297#	* 0x2 *  0x02c	*	* 0x2 *  0x01c  *
4298#	*****************	*****************
4299#	**   Next PC   **	**   Next PC   **
4300#	*****************	*****************
4301#	*      SR	*	*      SR	*
4302#	*****************	*****************
4303#	    (6 words)		    (6 words)
4304#
4305# the ftrapcc instruction should take a trap. so, here we must create a
4306# trap stack frame from an unimplemented fp instruction stack frame and
4307# jump to the user supplied entry point for the trap exception
4308funimp_ftrapcc_tp:
4309	mov.l		USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
4310	mov.w		&0x201c,EXC_VOFF(%a6)	# Vector Offset = 0x01c
4311
4312	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4313	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4314 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
4315
4316	unlk		%a6
4317	bra.l		_real_trap
4318
4319#########################
4320# fdb<cc> Dn,<label>	#
4321#########################
4322funimp_fdbcc:
4323
4324	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4325	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4326	bsr.l		_imem_read_word		# read displacement
4327
4328	tst.l		%d1			# did ifetch fail?
4329	bne.w		funimp_iacc		# yes
4330
4331	ext.l		%d0			# sign extend displacement
4332
4333	bsr.l		_fdbcc			# FDB<cc>()
4334
4335	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4336	beq.w		funimp_bsun
4337
4338	bra.w		funimp_done		# branch to finish
4339
4340#################
4341# fs<cc>.b <ea>	#
4342#################
4343funimp_fscc:
4344
4345	bsr.l		_fscc			# FS<cc>()
4346
4347# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
4348# does not need to update "An" before taking a bsun exception.
4349	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4350	beq.w		funimp_bsun
4351
4352	btst		&0x5,EXC_SR(%a6)	# yes; is it a user mode exception?
4353	bne.b		funimp_fscc_s		# no
4354
4355funimp_fscc_u:
4356	mov.l		EXC_A7(%a6),%a0		# yes; set new USP
4357	mov.l		%a0,%usp
4358	bra.w		funimp_done		# branch to finish
4359
4360# remember, I'm assuming that post-increment is bogus...(it IS!!!)
4361# so, the least significant WORD of the stacked effective address got
4362# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
4363# so that the rte will work correctly without destroying the result.
4364# even though the operation size is byte, the stack ptr is decr by 2.
4365#
4366# remember, also, this instruction may be traced.
4367funimp_fscc_s:
4368	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
4369	bne.w		funimp_done		# no
4370
4371	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4372	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4373 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
4374
4375	unlk		%a6
4376
4377	btst		&0x7,(%sp)		# is trace enabled?
4378	bne.b		funimp_fscc_s_trace	# yes
4379
4380	subq.l		&0x2,%sp
4381	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
4382	mov.l		0x6(%sp),0x4(%sp)	# shift lo(PC),voff "down"
4383	bra.l		_fpsp_done
4384
4385funimp_fscc_s_trace:
4386	subq.l		&0x2,%sp
4387	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
4388	mov.w		0x6(%sp),0x4(%sp)	# shift lo(PC)
4389	mov.w		&0x2024,0x6(%sp)	# fmt/voff = $2024
4390	fmov.l		%fpiar,0x8(%sp)		# insert "current PC"
4391
4392	bra.l		_real_trace
4393
4394#
4395# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
4396# the fp unimplemented instruction exception stack frame into a bsun stack frame,
4397# restore a bsun exception into the machine, and branch to the user
4398# supplied bsun hook.
4399#
4400#	 FP UNIMP FRAME		   BSUN FRAME
4401#	*****************	*****************
4402#	**    <EA>     **	* 0x0 * 0x0c0	*
4403#	*****************	*****************
4404#	* 0x2 *  0x02c  *	** Current PC  **
4405#	*****************	*****************
4406#	**   Next PC   **	*      SR	*
4407#	*****************	*****************
4408#	*      SR	*	    (4 words)
4409#	*****************
4410#	    (6 words)
4411#
4412funimp_bsun:
4413	mov.w		&0x00c0,2+EXC_EA(%a6)	# Fmt = 0x0; Vector Offset = 0x0c0
4414	mov.l		USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
4415	mov.w		EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
4416
4417	mov.w		&0xe000,2+FP_SRC(%a6)	# bsun exception enabled
4418
4419	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4420	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4421 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
4422
4423	frestore	FP_SRC(%a6)		# restore bsun exception
4424
4425	unlk		%a6
4426
4427	addq.l		&0x4,%sp		# erase sludge
4428
4429	bra.l		_real_bsun		# branch to user bsun hook
4430
4431#
4432# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
4433# and return.
4434#
4435# as usual, we have to check for trace mode being on here. since instructions
4436# modifying the supervisor stack frame don't pass through here, this is a
4437# relatively easy task.
4438#
4439funimp_done:
4440	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4441	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4442 	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
4443
4444	unlk		%a6
4445
4446	btst		&0x7,(%sp)		# is trace enabled?
4447	bne.b		funimp_trace		# yes
4448
4449	bra.l		_fpsp_done
4450
4451#	 FP UNIMP FRAME		  TRACE  FRAME
4452#	*****************	*****************
4453#	**    <EA>     **	**  Current PC **
4454#	*****************	*****************
4455#	* 0x2 *  0x02c	*	* 0x2 *  0x024  *
4456#	*****************	*****************
4457#	**   Next PC   **	**   Next PC   **
4458#	*****************	*****************
4459#	*      SR	*	*      SR	*
4460#	*****************	*****************
4461#	    (6 words)		    (6 words)
4462#
4463# the fscc instruction should take a trace trap. so, here we must create a
4464# trace stack frame from an unimplemented fp instruction stack frame and
4465# jump to the user supplied entry point for the trace exception
4466funimp_trace:
4467	fmov.l		%fpiar,0x8(%sp)		# current PC is in fpiar
4468	mov.b		&0x24,0x7(%sp)		# vector offset = 0x024
4469
4470	bra.l		_real_trace
4471
4472################################################################
4473
4474	global		tbl_trans
4475	swbeg		&0x1c0
4476tbl_trans:
4477	short 		tbl_trans - tbl_trans	# $00-0 fmovecr all
4478	short 		tbl_trans - tbl_trans	# $00-1 fmovecr all
4479	short 		tbl_trans - tbl_trans	# $00-2 fmovecr all
4480	short 		tbl_trans - tbl_trans	# $00-3 fmovecr all
4481	short 		tbl_trans - tbl_trans	# $00-4 fmovecr all
4482	short 		tbl_trans - tbl_trans	# $00-5 fmovecr all
4483	short 		tbl_trans - tbl_trans	# $00-6 fmovecr all
4484	short 		tbl_trans - tbl_trans	# $00-7 fmovecr all
4485
4486	short 		tbl_trans - tbl_trans	# $01-0 fint norm
4487	short		tbl_trans - tbl_trans	# $01-1 fint zero
4488	short		tbl_trans - tbl_trans	# $01-2 fint inf
4489	short		tbl_trans - tbl_trans	# $01-3 fint qnan
4490	short		tbl_trans - tbl_trans	# $01-5 fint denorm
4491	short		tbl_trans - tbl_trans	# $01-4 fint snan
4492	short		tbl_trans - tbl_trans	# $01-6 fint unnorm
4493	short		tbl_trans - tbl_trans	# $01-7 ERROR
4494
4495	short		ssinh	 - tbl_trans	# $02-0 fsinh norm
4496	short		src_zero - tbl_trans	# $02-1 fsinh zero
4497	short		src_inf	 - tbl_trans	# $02-2 fsinh inf
4498	short		src_qnan - tbl_trans	# $02-3 fsinh qnan
4499	short		ssinhd	 - tbl_trans	# $02-5 fsinh denorm
4500	short		src_snan - tbl_trans	# $02-4 fsinh snan
4501	short		tbl_trans - tbl_trans	# $02-6 fsinh unnorm
4502	short		tbl_trans - tbl_trans	# $02-7 ERROR
4503
4504	short		tbl_trans - tbl_trans	# $03-0 fintrz norm
4505	short		tbl_trans - tbl_trans	# $03-1 fintrz zero
4506	short		tbl_trans - tbl_trans	# $03-2 fintrz inf
4507	short		tbl_trans - tbl_trans	# $03-3 fintrz qnan
4508	short		tbl_trans - tbl_trans	# $03-5 fintrz denorm
4509	short		tbl_trans - tbl_trans	# $03-4 fintrz snan
4510	short		tbl_trans - tbl_trans	# $03-6 fintrz unnorm
4511	short		tbl_trans - tbl_trans	# $03-7 ERROR
4512
4513	short		tbl_trans - tbl_trans	# $04-0 fsqrt norm
4514	short		tbl_trans - tbl_trans	# $04-1 fsqrt zero
4515	short		tbl_trans - tbl_trans	# $04-2 fsqrt inf
4516	short		tbl_trans - tbl_trans	# $04-3 fsqrt qnan
4517	short		tbl_trans - tbl_trans	# $04-5 fsqrt denorm
4518	short		tbl_trans - tbl_trans	# $04-4 fsqrt snan
4519	short		tbl_trans - tbl_trans	# $04-6 fsqrt unnorm
4520	short		tbl_trans - tbl_trans	# $04-7 ERROR
4521
4522	short		tbl_trans - tbl_trans	# $05-0 ERROR
4523	short		tbl_trans - tbl_trans	# $05-1 ERROR
4524	short		tbl_trans - tbl_trans	# $05-2 ERROR
4525	short		tbl_trans - tbl_trans	# $05-3 ERROR
4526	short		tbl_trans - tbl_trans	# $05-4 ERROR
4527	short		tbl_trans - tbl_trans	# $05-5 ERROR
4528	short		tbl_trans - tbl_trans	# $05-6 ERROR
4529	short		tbl_trans - tbl_trans	# $05-7 ERROR
4530
4531	short		slognp1	 - tbl_trans	# $06-0 flognp1 norm
4532	short		src_zero - tbl_trans	# $06-1 flognp1 zero
4533	short		sopr_inf - tbl_trans	# $06-2 flognp1 inf
4534	short		src_qnan - tbl_trans	# $06-3 flognp1 qnan
4535	short		slognp1d - tbl_trans	# $06-5 flognp1 denorm
4536	short		src_snan - tbl_trans	# $06-4 flognp1 snan
4537	short		tbl_trans - tbl_trans	# $06-6 flognp1 unnorm
4538	short		tbl_trans - tbl_trans	# $06-7 ERROR
4539
4540	short		tbl_trans - tbl_trans	# $07-0 ERROR
4541	short		tbl_trans - tbl_trans	# $07-1 ERROR
4542	short		tbl_trans - tbl_trans	# $07-2 ERROR
4543	short		tbl_trans - tbl_trans	# $07-3 ERROR
4544	short		tbl_trans - tbl_trans	# $07-4 ERROR
4545	short		tbl_trans - tbl_trans	# $07-5 ERROR
4546	short		tbl_trans - tbl_trans	# $07-6 ERROR
4547	short		tbl_trans - tbl_trans	# $07-7 ERROR
4548
4549	short		setoxm1	 - tbl_trans	# $08-0 fetoxm1 norm
4550	short		src_zero - tbl_trans	# $08-1 fetoxm1 zero
4551	short		setoxm1i - tbl_trans	# $08-2 fetoxm1 inf
4552	short		src_qnan - tbl_trans	# $08-3 fetoxm1 qnan
4553	short		setoxm1d - tbl_trans	# $08-5 fetoxm1 denorm
4554	short		src_snan - tbl_trans	# $08-4 fetoxm1 snan
4555	short		tbl_trans - tbl_trans	# $08-6 fetoxm1 unnorm
4556	short		tbl_trans - tbl_trans	# $08-7 ERROR
4557
4558	short		stanh	 - tbl_trans	# $09-0 ftanh norm
4559	short		src_zero - tbl_trans	# $09-1 ftanh zero
4560	short		src_one	 - tbl_trans	# $09-2 ftanh inf
4561	short		src_qnan - tbl_trans	# $09-3 ftanh qnan
4562	short		stanhd	 - tbl_trans	# $09-5 ftanh denorm
4563	short		src_snan - tbl_trans	# $09-4 ftanh snan
4564	short		tbl_trans - tbl_trans	# $09-6 ftanh unnorm
4565	short		tbl_trans - tbl_trans	# $09-7 ERROR
4566
4567	short		satan	 - tbl_trans	# $0a-0 fatan norm
4568	short		src_zero - tbl_trans	# $0a-1 fatan zero
4569	short		spi_2	 - tbl_trans	# $0a-2 fatan inf
4570	short		src_qnan - tbl_trans	# $0a-3 fatan qnan
4571	short		satand	 - tbl_trans	# $0a-5 fatan denorm
4572	short		src_snan - tbl_trans	# $0a-4 fatan snan
4573	short		tbl_trans - tbl_trans	# $0a-6 fatan unnorm
4574	short		tbl_trans - tbl_trans	# $0a-7 ERROR
4575
4576	short		tbl_trans - tbl_trans	# $0b-0 ERROR
4577	short		tbl_trans - tbl_trans	# $0b-1 ERROR
4578	short		tbl_trans - tbl_trans	# $0b-2 ERROR
4579	short		tbl_trans - tbl_trans	# $0b-3 ERROR
4580	short		tbl_trans - tbl_trans	# $0b-4 ERROR
4581	short		tbl_trans - tbl_trans	# $0b-5 ERROR
4582	short		tbl_trans - tbl_trans	# $0b-6 ERROR
4583	short		tbl_trans - tbl_trans	# $0b-7 ERROR
4584
4585	short		sasin	 - tbl_trans	# $0c-0 fasin norm
4586	short		src_zero - tbl_trans	# $0c-1 fasin zero
4587	short		t_operr	 - tbl_trans	# $0c-2 fasin inf
4588	short		src_qnan - tbl_trans	# $0c-3 fasin qnan
4589	short		sasind	 - tbl_trans	# $0c-5 fasin denorm
4590	short		src_snan - tbl_trans	# $0c-4 fasin snan
4591	short		tbl_trans - tbl_trans	# $0c-6 fasin unnorm
4592	short		tbl_trans - tbl_trans	# $0c-7 ERROR
4593
4594	short		satanh	 - tbl_trans	# $0d-0 fatanh norm
4595	short		src_zero - tbl_trans	# $0d-1 fatanh zero
4596	short		t_operr	 - tbl_trans	# $0d-2 fatanh inf
4597	short		src_qnan - tbl_trans	# $0d-3 fatanh qnan
4598	short		satanhd	 - tbl_trans	# $0d-5 fatanh denorm
4599	short		src_snan - tbl_trans	# $0d-4 fatanh snan
4600	short		tbl_trans - tbl_trans	# $0d-6 fatanh unnorm
4601	short		tbl_trans - tbl_trans	# $0d-7 ERROR
4602
4603	short		ssin	 - tbl_trans	# $0e-0 fsin norm
4604	short		src_zero - tbl_trans	# $0e-1 fsin zero
4605	short		t_operr	 - tbl_trans	# $0e-2 fsin inf
4606	short		src_qnan - tbl_trans	# $0e-3 fsin qnan
4607	short		ssind	 - tbl_trans	# $0e-5 fsin denorm
4608	short		src_snan - tbl_trans	# $0e-4 fsin snan
4609	short		tbl_trans - tbl_trans	# $0e-6 fsin unnorm
4610	short		tbl_trans - tbl_trans	# $0e-7 ERROR
4611
4612	short		stan	 - tbl_trans	# $0f-0 ftan norm
4613	short		src_zero - tbl_trans	# $0f-1 ftan zero
4614	short		t_operr	 - tbl_trans	# $0f-2 ftan inf
4615	short		src_qnan - tbl_trans	# $0f-3 ftan qnan
4616	short		stand	 - tbl_trans	# $0f-5 ftan denorm
4617	short		src_snan - tbl_trans	# $0f-4 ftan snan
4618	short		tbl_trans - tbl_trans	# $0f-6 ftan unnorm
4619	short		tbl_trans - tbl_trans	# $0f-7 ERROR
4620
4621	short		setox	 - tbl_trans	# $10-0 fetox norm
4622	short		ld_pone	 - tbl_trans	# $10-1 fetox zero
4623	short		szr_inf	 - tbl_trans	# $10-2 fetox inf
4624	short		src_qnan - tbl_trans	# $10-3 fetox qnan
4625	short		setoxd	 - tbl_trans	# $10-5 fetox denorm
4626	short		src_snan - tbl_trans	# $10-4 fetox snan
4627	short		tbl_trans - tbl_trans	# $10-6 fetox unnorm
4628	short		tbl_trans - tbl_trans	# $10-7 ERROR
4629
4630	short		stwotox	 - tbl_trans	# $11-0 ftwotox norm
4631	short		ld_pone	 - tbl_trans	# $11-1 ftwotox zero
4632	short		szr_inf	 - tbl_trans	# $11-2 ftwotox inf
4633	short		src_qnan - tbl_trans	# $11-3 ftwotox qnan
4634	short		stwotoxd - tbl_trans	# $11-5 ftwotox denorm
4635	short		src_snan - tbl_trans	# $11-4 ftwotox snan
4636	short		tbl_trans - tbl_trans	# $11-6 ftwotox unnorm
4637	short		tbl_trans - tbl_trans	# $11-7 ERROR
4638
4639	short		stentox	 - tbl_trans	# $12-0 ftentox norm
4640	short		ld_pone	 - tbl_trans	# $12-1 ftentox zero
4641	short		szr_inf	 - tbl_trans	# $12-2 ftentox inf
4642	short		src_qnan - tbl_trans	# $12-3 ftentox qnan
4643	short		stentoxd - tbl_trans	# $12-5 ftentox denorm
4644	short		src_snan - tbl_trans	# $12-4 ftentox snan
4645	short		tbl_trans - tbl_trans	# $12-6 ftentox unnorm
4646	short		tbl_trans - tbl_trans	# $12-7 ERROR
4647
4648	short		tbl_trans - tbl_trans	# $13-0 ERROR
4649	short		tbl_trans - tbl_trans	# $13-1 ERROR
4650	short		tbl_trans - tbl_trans	# $13-2 ERROR
4651	short		tbl_trans - tbl_trans	# $13-3 ERROR
4652	short		tbl_trans - tbl_trans	# $13-4 ERROR
4653	short		tbl_trans - tbl_trans	# $13-5 ERROR
4654	short		tbl_trans - tbl_trans	# $13-6 ERROR
4655	short		tbl_trans - tbl_trans	# $13-7 ERROR
4656
4657	short		slogn	 - tbl_trans	# $14-0 flogn norm
4658	short		t_dz2	 - tbl_trans	# $14-1 flogn zero
4659	short		sopr_inf - tbl_trans	# $14-2 flogn inf
4660	short		src_qnan - tbl_trans	# $14-3 flogn qnan
4661	short		slognd	 - tbl_trans	# $14-5 flogn denorm
4662	short		src_snan - tbl_trans	# $14-4 flogn snan
4663	short		tbl_trans - tbl_trans	# $14-6 flogn unnorm
4664	short		tbl_trans - tbl_trans	# $14-7 ERROR
4665
4666	short		slog10	 - tbl_trans	# $15-0 flog10 norm
4667	short		t_dz2	 - tbl_trans	# $15-1 flog10 zero
4668	short		sopr_inf - tbl_trans	# $15-2 flog10 inf
4669	short		src_qnan - tbl_trans	# $15-3 flog10 qnan
4670	short		slog10d	 - tbl_trans	# $15-5 flog10 denorm
4671	short		src_snan - tbl_trans	# $15-4 flog10 snan
4672	short		tbl_trans - tbl_trans	# $15-6 flog10 unnorm
4673	short		tbl_trans - tbl_trans	# $15-7 ERROR
4674
4675	short		slog2	 - tbl_trans	# $16-0 flog2 norm
4676	short		t_dz2	 - tbl_trans	# $16-1 flog2 zero
4677	short		sopr_inf - tbl_trans	# $16-2 flog2 inf
4678	short		src_qnan - tbl_trans	# $16-3 flog2 qnan
4679	short		slog2d	 - tbl_trans	# $16-5 flog2 denorm
4680	short		src_snan - tbl_trans	# $16-4 flog2 snan
4681	short		tbl_trans - tbl_trans	# $16-6 flog2 unnorm
4682	short		tbl_trans - tbl_trans	# $16-7 ERROR
4683
4684	short		tbl_trans - tbl_trans	# $17-0 ERROR
4685	short		tbl_trans - tbl_trans	# $17-1 ERROR
4686	short		tbl_trans - tbl_trans	# $17-2 ERROR
4687	short		tbl_trans - tbl_trans	# $17-3 ERROR
4688	short		tbl_trans - tbl_trans	# $17-4 ERROR
4689	short		tbl_trans - tbl_trans	# $17-5 ERROR
4690	short		tbl_trans - tbl_trans	# $17-6 ERROR
4691	short		tbl_trans - tbl_trans	# $17-7 ERROR
4692
4693	short		tbl_trans - tbl_trans	# $18-0 fabs norm
4694	short		tbl_trans - tbl_trans	# $18-1 fabs zero
4695	short		tbl_trans - tbl_trans	# $18-2 fabs inf
4696	short		tbl_trans - tbl_trans	# $18-3 fabs qnan
4697	short		tbl_trans - tbl_trans	# $18-5 fabs denorm
4698	short		tbl_trans - tbl_trans	# $18-4 fabs snan
4699	short		tbl_trans - tbl_trans	# $18-6 fabs unnorm
4700	short		tbl_trans - tbl_trans	# $18-7 ERROR
4701
4702	short		scosh	 - tbl_trans	# $19-0 fcosh norm
4703	short		ld_pone	 - tbl_trans	# $19-1 fcosh zero
4704	short		ld_pinf	 - tbl_trans	# $19-2 fcosh inf
4705	short		src_qnan - tbl_trans	# $19-3 fcosh qnan
4706	short		scoshd	 - tbl_trans	# $19-5 fcosh denorm
4707	short		src_snan - tbl_trans	# $19-4 fcosh snan
4708	short		tbl_trans - tbl_trans	# $19-6 fcosh unnorm
4709	short		tbl_trans - tbl_trans	# $19-7 ERROR
4710
4711	short		tbl_trans - tbl_trans	# $1a-0 fneg norm
4712	short		tbl_trans - tbl_trans	# $1a-1 fneg zero
4713	short		tbl_trans - tbl_trans	# $1a-2 fneg inf
4714	short		tbl_trans - tbl_trans	# $1a-3 fneg qnan
4715	short		tbl_trans - tbl_trans	# $1a-5 fneg denorm
4716	short		tbl_trans - tbl_trans	# $1a-4 fneg snan
4717	short		tbl_trans - tbl_trans	# $1a-6 fneg unnorm
4718	short		tbl_trans - tbl_trans	# $1a-7 ERROR
4719
4720	short		tbl_trans - tbl_trans	# $1b-0 ERROR
4721	short		tbl_trans - tbl_trans	# $1b-1 ERROR
4722	short		tbl_trans - tbl_trans	# $1b-2 ERROR
4723	short		tbl_trans - tbl_trans	# $1b-3 ERROR
4724	short		tbl_trans - tbl_trans	# $1b-4 ERROR
4725	short		tbl_trans - tbl_trans	# $1b-5 ERROR
4726	short		tbl_trans - tbl_trans	# $1b-6 ERROR
4727	short		tbl_trans - tbl_trans	# $1b-7 ERROR
4728
4729	short		sacos	 - tbl_trans	# $1c-0 facos norm
4730	short		ld_ppi2	 - tbl_trans	# $1c-1 facos zero
4731	short		t_operr	 - tbl_trans	# $1c-2 facos inf
4732	short		src_qnan - tbl_trans	# $1c-3 facos qnan
4733	short		sacosd	 - tbl_trans	# $1c-5 facos denorm
4734	short		src_snan - tbl_trans	# $1c-4 facos snan
4735	short		tbl_trans - tbl_trans	# $1c-6 facos unnorm
4736	short		tbl_trans - tbl_trans	# $1c-7 ERROR
4737
4738	short		scos	 - tbl_trans	# $1d-0 fcos norm
4739	short		ld_pone	 - tbl_trans	# $1d-1 fcos zero
4740	short		t_operr	 - tbl_trans	# $1d-2 fcos inf
4741	short		src_qnan - tbl_trans	# $1d-3 fcos qnan
4742	short		scosd	 - tbl_trans	# $1d-5 fcos denorm
4743	short		src_snan - tbl_trans	# $1d-4 fcos snan
4744	short		tbl_trans - tbl_trans	# $1d-6 fcos unnorm
4745	short		tbl_trans - tbl_trans	# $1d-7 ERROR
4746
4747	short		sgetexp	 - tbl_trans	# $1e-0 fgetexp norm
4748	short		src_zero - tbl_trans	# $1e-1 fgetexp zero
4749	short		t_operr	 - tbl_trans	# $1e-2 fgetexp inf
4750	short		src_qnan - tbl_trans	# $1e-3 fgetexp qnan
4751	short		sgetexpd - tbl_trans	# $1e-5 fgetexp denorm
4752	short		src_snan - tbl_trans	# $1e-4 fgetexp snan
4753	short		tbl_trans - tbl_trans	# $1e-6 fgetexp unnorm
4754	short		tbl_trans - tbl_trans	# $1e-7 ERROR
4755
4756	short		sgetman	 - tbl_trans	# $1f-0 fgetman norm
4757	short		src_zero - tbl_trans	# $1f-1 fgetman zero
4758	short		t_operr	 - tbl_trans	# $1f-2 fgetman inf
4759	short		src_qnan - tbl_trans	# $1f-3 fgetman qnan
4760	short		sgetmand - tbl_trans	# $1f-5 fgetman denorm
4761	short		src_snan - tbl_trans	# $1f-4 fgetman snan
4762	short		tbl_trans - tbl_trans	# $1f-6 fgetman unnorm
4763	short		tbl_trans - tbl_trans	# $1f-7 ERROR
4764
4765	short		tbl_trans - tbl_trans	# $20-0 fdiv norm
4766	short		tbl_trans - tbl_trans	# $20-1 fdiv zero
4767	short		tbl_trans - tbl_trans	# $20-2 fdiv inf
4768	short		tbl_trans - tbl_trans	# $20-3 fdiv qnan
4769	short		tbl_trans - tbl_trans	# $20-5 fdiv denorm
4770	short		tbl_trans - tbl_trans	# $20-4 fdiv snan
4771	short		tbl_trans - tbl_trans	# $20-6 fdiv unnorm
4772	short		tbl_trans - tbl_trans	# $20-7 ERROR
4773
4774	short		smod_snorm - tbl_trans	# $21-0 fmod norm
4775	short		smod_szero - tbl_trans	# $21-1 fmod zero
4776	short		smod_sinf - tbl_trans	# $21-2 fmod inf
4777	short		sop_sqnan - tbl_trans	# $21-3 fmod qnan
4778	short		smod_sdnrm - tbl_trans	# $21-5 fmod denorm
4779	short		sop_ssnan - tbl_trans	# $21-4 fmod snan
4780	short		tbl_trans - tbl_trans	# $21-6 fmod unnorm
4781	short		tbl_trans - tbl_trans	# $21-7 ERROR
4782
4783	short		tbl_trans - tbl_trans	# $22-0 fadd norm
4784	short		tbl_trans - tbl_trans	# $22-1 fadd zero
4785	short		tbl_trans - tbl_trans	# $22-2 fadd inf
4786	short		tbl_trans - tbl_trans	# $22-3 fadd qnan
4787	short		tbl_trans - tbl_trans	# $22-5 fadd denorm
4788	short		tbl_trans - tbl_trans	# $22-4 fadd snan
4789	short		tbl_trans - tbl_trans	# $22-6 fadd unnorm
4790	short		tbl_trans - tbl_trans	# $22-7 ERROR
4791
4792	short		tbl_trans - tbl_trans	# $23-0 fmul norm
4793	short		tbl_trans - tbl_trans	# $23-1 fmul zero
4794	short		tbl_trans - tbl_trans	# $23-2 fmul inf
4795	short		tbl_trans - tbl_trans	# $23-3 fmul qnan
4796	short		tbl_trans - tbl_trans	# $23-5 fmul denorm
4797	short		tbl_trans - tbl_trans	# $23-4 fmul snan
4798	short		tbl_trans - tbl_trans	# $23-6 fmul unnorm
4799	short		tbl_trans - tbl_trans	# $23-7 ERROR
4800
4801	short		tbl_trans - tbl_trans	# $24-0 fsgldiv norm
4802	short		tbl_trans - tbl_trans	# $24-1 fsgldiv zero
4803	short		tbl_trans - tbl_trans	# $24-2 fsgldiv inf
4804	short		tbl_trans - tbl_trans	# $24-3 fsgldiv qnan
4805	short		tbl_trans - tbl_trans	# $24-5 fsgldiv denorm
4806	short		tbl_trans - tbl_trans	# $24-4 fsgldiv snan
4807	short		tbl_trans - tbl_trans	# $24-6 fsgldiv unnorm
4808	short		tbl_trans - tbl_trans	# $24-7 ERROR
4809
4810	short		srem_snorm - tbl_trans	# $25-0 frem norm
4811	short		srem_szero - tbl_trans	# $25-1 frem zero
4812	short		srem_sinf - tbl_trans	# $25-2 frem inf
4813	short		sop_sqnan - tbl_trans	# $25-3 frem qnan
4814	short		srem_sdnrm - tbl_trans	# $25-5 frem denorm
4815	short		sop_ssnan - tbl_trans	# $25-4 frem snan
4816	short		tbl_trans - tbl_trans	# $25-6 frem unnorm
4817	short		tbl_trans - tbl_trans	# $25-7 ERROR
4818
4819	short		sscale_snorm - tbl_trans # $26-0 fscale norm
4820	short		sscale_szero - tbl_trans # $26-1 fscale zero
4821	short		sscale_sinf - tbl_trans	# $26-2 fscale inf
4822	short		sop_sqnan - tbl_trans	# $26-3 fscale qnan
4823	short		sscale_sdnrm - tbl_trans # $26-5 fscale denorm
4824	short		sop_ssnan - tbl_trans	# $26-4 fscale snan
4825	short		tbl_trans - tbl_trans	# $26-6 fscale unnorm
4826	short		tbl_trans - tbl_trans	# $26-7 ERROR
4827
4828	short		tbl_trans - tbl_trans	# $27-0 fsglmul norm
4829	short		tbl_trans - tbl_trans	# $27-1 fsglmul zero
4830	short		tbl_trans - tbl_trans	# $27-2 fsglmul inf
4831	short		tbl_trans - tbl_trans	# $27-3 fsglmul qnan
4832	short		tbl_trans - tbl_trans	# $27-5 fsglmul denorm
4833	short		tbl_trans - tbl_trans	# $27-4 fsglmul snan
4834	short		tbl_trans - tbl_trans	# $27-6 fsglmul unnorm
4835	short		tbl_trans - tbl_trans	# $27-7 ERROR
4836
4837	short		tbl_trans - tbl_trans	# $28-0 fsub norm
4838	short		tbl_trans - tbl_trans	# $28-1 fsub zero
4839	short		tbl_trans - tbl_trans	# $28-2 fsub inf
4840	short		tbl_trans - tbl_trans	# $28-3 fsub qnan
4841	short		tbl_trans - tbl_trans	# $28-5 fsub denorm
4842	short		tbl_trans - tbl_trans	# $28-4 fsub snan
4843	short		tbl_trans - tbl_trans	# $28-6 fsub unnorm
4844	short		tbl_trans - tbl_trans	# $28-7 ERROR
4845
4846	short		tbl_trans - tbl_trans	# $29-0 ERROR
4847	short		tbl_trans - tbl_trans	# $29-1 ERROR
4848	short		tbl_trans - tbl_trans	# $29-2 ERROR
4849	short		tbl_trans - tbl_trans	# $29-3 ERROR
4850	short		tbl_trans - tbl_trans	# $29-4 ERROR
4851	short		tbl_trans - tbl_trans	# $29-5 ERROR
4852	short		tbl_trans - tbl_trans	# $29-6 ERROR
4853	short		tbl_trans - tbl_trans	# $29-7 ERROR
4854
4855	short		tbl_trans - tbl_trans	# $2a-0 ERROR
4856	short		tbl_trans - tbl_trans	# $2a-1 ERROR
4857	short		tbl_trans - tbl_trans	# $2a-2 ERROR
4858	short		tbl_trans - tbl_trans	# $2a-3 ERROR
4859	short		tbl_trans - tbl_trans	# $2a-4 ERROR
4860	short		tbl_trans - tbl_trans	# $2a-5 ERROR
4861	short		tbl_trans - tbl_trans	# $2a-6 ERROR
4862	short		tbl_trans - tbl_trans	# $2a-7 ERROR
4863
4864	short		tbl_trans - tbl_trans	# $2b-0 ERROR
4865	short		tbl_trans - tbl_trans	# $2b-1 ERROR
4866	short		tbl_trans - tbl_trans	# $2b-2 ERROR
4867	short		tbl_trans - tbl_trans	# $2b-3 ERROR
4868	short		tbl_trans - tbl_trans	# $2b-4 ERROR
4869	short		tbl_trans - tbl_trans	# $2b-5 ERROR
4870	short		tbl_trans - tbl_trans	# $2b-6 ERROR
4871	short		tbl_trans - tbl_trans	# $2b-7 ERROR
4872
4873	short		tbl_trans - tbl_trans	# $2c-0 ERROR
4874	short		tbl_trans - tbl_trans	# $2c-1 ERROR
4875	short		tbl_trans - tbl_trans	# $2c-2 ERROR
4876	short		tbl_trans - tbl_trans	# $2c-3 ERROR
4877	short		tbl_trans - tbl_trans	# $2c-4 ERROR
4878	short		tbl_trans - tbl_trans	# $2c-5 ERROR
4879	short		tbl_trans - tbl_trans	# $2c-6 ERROR
4880	short		tbl_trans - tbl_trans	# $2c-7 ERROR
4881
4882	short		tbl_trans - tbl_trans	# $2d-0 ERROR
4883	short		tbl_trans - tbl_trans	# $2d-1 ERROR
4884	short		tbl_trans - tbl_trans	# $2d-2 ERROR
4885	short		tbl_trans - tbl_trans	# $2d-3 ERROR
4886	short		tbl_trans - tbl_trans	# $2d-4 ERROR
4887	short		tbl_trans - tbl_trans	# $2d-5 ERROR
4888	short		tbl_trans - tbl_trans	# $2d-6 ERROR
4889	short		tbl_trans - tbl_trans	# $2d-7 ERROR
4890
4891	short		tbl_trans - tbl_trans	# $2e-0 ERROR
4892	short		tbl_trans - tbl_trans	# $2e-1 ERROR
4893	short		tbl_trans - tbl_trans	# $2e-2 ERROR
4894	short		tbl_trans - tbl_trans	# $2e-3 ERROR
4895	short		tbl_trans - tbl_trans	# $2e-4 ERROR
4896	short		tbl_trans - tbl_trans	# $2e-5 ERROR
4897	short		tbl_trans - tbl_trans	# $2e-6 ERROR
4898	short		tbl_trans - tbl_trans	# $2e-7 ERROR
4899
4900	short		tbl_trans - tbl_trans	# $2f-0 ERROR
4901	short		tbl_trans - tbl_trans	# $2f-1 ERROR
4902	short		tbl_trans - tbl_trans	# $2f-2 ERROR
4903	short		tbl_trans - tbl_trans	# $2f-3 ERROR
4904	short		tbl_trans - tbl_trans	# $2f-4 ERROR
4905	short		tbl_trans - tbl_trans	# $2f-5 ERROR
4906	short		tbl_trans - tbl_trans	# $2f-6 ERROR
4907	short		tbl_trans - tbl_trans	# $2f-7 ERROR
4908
4909	short		ssincos	 - tbl_trans	# $30-0 fsincos norm
4910	short		ssincosz - tbl_trans	# $30-1 fsincos zero
4911	short		ssincosi - tbl_trans	# $30-2 fsincos inf
4912	short		ssincosqnan - tbl_trans	# $30-3 fsincos qnan
4913	short		ssincosd - tbl_trans	# $30-5 fsincos denorm
4914	short		ssincossnan - tbl_trans	# $30-4 fsincos snan
4915	short		tbl_trans - tbl_trans	# $30-6 fsincos unnorm
4916	short		tbl_trans - tbl_trans	# $30-7 ERROR
4917
4918	short		ssincos	 - tbl_trans	# $31-0 fsincos norm
4919	short		ssincosz - tbl_trans	# $31-1 fsincos zero
4920	short		ssincosi - tbl_trans	# $31-2 fsincos inf
4921	short		ssincosqnan - tbl_trans	# $31-3 fsincos qnan
4922	short		ssincosd - tbl_trans	# $31-5 fsincos denorm
4923	short		ssincossnan - tbl_trans	# $31-4 fsincos snan
4924	short		tbl_trans - tbl_trans	# $31-6 fsincos unnorm
4925	short		tbl_trans - tbl_trans	# $31-7 ERROR
4926
4927	short		ssincos	 - tbl_trans	# $32-0 fsincos norm
4928	short		ssincosz - tbl_trans	# $32-1 fsincos zero
4929	short		ssincosi - tbl_trans	# $32-2 fsincos inf
4930	short		ssincosqnan - tbl_trans	# $32-3 fsincos qnan
4931	short		ssincosd - tbl_trans	# $32-5 fsincos denorm
4932	short		ssincossnan - tbl_trans	# $32-4 fsincos snan
4933	short		tbl_trans - tbl_trans	# $32-6 fsincos unnorm
4934	short		tbl_trans - tbl_trans	# $32-7 ERROR
4935
4936	short		ssincos	 - tbl_trans	# $33-0 fsincos norm
4937	short		ssincosz - tbl_trans	# $33-1 fsincos zero
4938	short		ssincosi - tbl_trans	# $33-2 fsincos inf
4939	short		ssincosqnan - tbl_trans	# $33-3 fsincos qnan
4940	short		ssincosd - tbl_trans	# $33-5 fsincos denorm
4941	short		ssincossnan - tbl_trans	# $33-4 fsincos snan
4942	short		tbl_trans - tbl_trans	# $33-6 fsincos unnorm
4943	short		tbl_trans - tbl_trans	# $33-7 ERROR
4944
4945	short		ssincos	 - tbl_trans	# $34-0 fsincos norm
4946	short		ssincosz - tbl_trans	# $34-1 fsincos zero
4947	short		ssincosi - tbl_trans	# $34-2 fsincos inf
4948	short		ssincosqnan - tbl_trans	# $34-3 fsincos qnan
4949	short		ssincosd - tbl_trans	# $34-5 fsincos denorm
4950	short		ssincossnan - tbl_trans	# $34-4 fsincos snan
4951	short		tbl_trans - tbl_trans	# $34-6 fsincos unnorm
4952	short		tbl_trans - tbl_trans	# $34-7 ERROR
4953
4954	short		ssincos	 - tbl_trans	# $35-0 fsincos norm
4955	short		ssincosz - tbl_trans	# $35-1 fsincos zero
4956	short		ssincosi - tbl_trans	# $35-2 fsincos inf
4957	short		ssincosqnan - tbl_trans	# $35-3 fsincos qnan
4958	short		ssincosd - tbl_trans	# $35-5 fsincos denorm
4959	short		ssincossnan - tbl_trans	# $35-4 fsincos snan
4960	short		tbl_trans - tbl_trans	# $35-6 fsincos unnorm
4961	short		tbl_trans - tbl_trans	# $35-7 ERROR
4962
4963	short		ssincos	 - tbl_trans	# $36-0 fsincos norm
4964	short		ssincosz - tbl_trans	# $36-1 fsincos zero
4965	short		ssincosi - tbl_trans	# $36-2 fsincos inf
4966	short		ssincosqnan - tbl_trans	# $36-3 fsincos qnan
4967	short		ssincosd - tbl_trans	# $36-5 fsincos denorm
4968	short		ssincossnan - tbl_trans	# $36-4 fsincos snan
4969	short		tbl_trans - tbl_trans	# $36-6 fsincos unnorm
4970	short		tbl_trans - tbl_trans	# $36-7 ERROR
4971
4972	short		ssincos	 - tbl_trans	# $37-0 fsincos norm
4973	short		ssincosz - tbl_trans	# $37-1 fsincos zero
4974	short		ssincosi - tbl_trans	# $37-2 fsincos inf
4975	short		ssincosqnan - tbl_trans	# $37-3 fsincos qnan
4976	short		ssincosd - tbl_trans	# $37-5 fsincos denorm
4977	short		ssincossnan - tbl_trans	# $37-4 fsincos snan
4978	short		tbl_trans - tbl_trans	# $37-6 fsincos unnorm
4979	short		tbl_trans - tbl_trans	# $37-7 ERROR
4980
4981##########
4982
4983# the instruction fetch access for the displacement word for the
4984# fdbcc emulation failed. here, we create an access error frame
4985# from the current frame and branch to _real_access().
4986funimp_iacc:
4987	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4988	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4989	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
4990
4991	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
4992
4993	unlk		%a6
4994
4995	mov.l		(%sp),-(%sp)		# store SR,hi(PC)
4996	mov.w		0x8(%sp),0x4(%sp)	# store lo(PC)
4997	mov.w		&0x4008,0x6(%sp)	# store voff
4998	mov.l		0x2(%sp),0x8(%sp)	# store EA
4999	mov.l		&0x09428001,0xc(%sp)	# store FSLW
5000
5001	btst		&0x5,(%sp)		# user or supervisor mode?
5002	beq.b		funimp_iacc_end		# user
5003	bset		&0x2,0xd(%sp)		# set supervisor TM bit
5004
5005funimp_iacc_end:
5006	bra.l		_real_access
5007
5008#########################################################################
5009# ssin():     computes the sine of a normalized input			#
5010# ssind():    computes the sine of a denormalized input			#
5011# scos():     computes the cosine of a normalized input			#
5012# scosd():    computes the cosine of a denormalized input		#
5013# ssincos():  computes the sine and cosine of a normalized input	#
5014# ssincosd(): computes the sine and cosine of a denormalized input	#
5015#									#
5016# INPUT *************************************************************** #
5017#	a0 = pointer to extended precision input			#
5018#	d0 = round precision,mode					#
5019#									#
5020# OUTPUT ************************************************************** #
5021#	fp0 = sin(X) or cos(X) 						#
5022#									#
5023#    For ssincos(X):							#
5024#	fp0 = sin(X)							#
5025#	fp1 = cos(X)							#
5026#									#
5027# ACCURACY and MONOTONICITY ******************************************* #
5028#	The returned result is within 1 ulp in 64 significant bit, i.e.	#
5029#	within 0.5001 ulp to 53 bits if the result is subsequently 	#
5030#	rounded to double precision. The result is provably monotonic	#
5031#	in double precision.						#
5032#									#
5033# ALGORITHM ***********************************************************	#
5034#									#
5035#	SIN and COS:							#
5036#	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.	#
5037#									#
5038#	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.			#
5039#									#
5040#	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5041#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
5042#		Overwrite k by k := k + AdjN.				#
5043#									#
5044#	4. If k is even, go to 6.					#
5045#									#
5046#	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. 		#
5047#		Return sgn*cos(r) where cos(r) is approximated by an 	#
5048#		even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),	#
5049#		s = r*r.						#
5050#		Exit.							#
5051#									#
5052#	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)	#
5053#		where sin(r) is approximated by an odd polynomial in r	#
5054#		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.	#
5055#		Exit.							#
5056#									#
5057#	7. If |X| > 1, go to 9.						#
5058#									#
5059#	8. (|X|<2**(-40)) If SIN is invoked, return X; 			#
5060#		otherwise return 1.					#
5061#									#
5062#	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, 		#
5063#		go back to 3.						#
5064#									#
5065#	SINCOS:								#
5066#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
5067#									#
5068#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5069#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
5070#									#
5071#	3. If k is even, go to 5.					#
5072#									#
5073#	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie.	#
5074#		j1 exclusive or with the l.s.b. of k.			#
5075#		sgn1 := (-1)**j1, sgn2 := (-1)**j2.			#
5076#		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where	#
5077#		sin(r) and cos(r) are computed as odd and even 		#
5078#		polynomials in r, respectively. Exit			#
5079#									#
5080#	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.			#
5081#		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where	#
5082#		sin(r) and cos(r) are computed as odd and even 		#
5083#		polynomials in r, respectively. Exit			#
5084#									#
5085#	6. If |X| > 1, go to 8.						#
5086#									#
5087#	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.		#
5088#									#
5089#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, 		#
5090#		go back to 2.						#
5091#									#
5092#########################################################################
5093
5094SINA7:	long		0xBD6AAA77,0xCCC994F5
5095SINA6:	long		0x3DE61209,0x7AAE8DA1
5096SINA5:	long		0xBE5AE645,0x2A118AE4
5097SINA4:	long		0x3EC71DE3,0xA5341531
5098SINA3:	long		0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
5099SINA2:	long		0x3FF80000,0x88888888,0x888859AF,0x00000000
5100SINA1:	long		0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
5101
5102COSB8:	long		0x3D2AC4D0,0xD6011EE3
5103COSB7:	long		0xBDA9396F,0x9F45AC19
5104COSB6:	long		0x3E21EED9,0x0612C972
5105COSB5:	long		0xBE927E4F,0xB79D9FCF
5106COSB4:	long		0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5107COSB3:	long		0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5108COSB2:	long		0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5109COSB1:	long		0xBF000000
5110
5111	set		INARG,FP_SCR0
5112
5113	set		X,FP_SCR0
5114#	set		XDCARE,X+2
5115	set		XFRAC,X+4
5116
5117	set		RPRIME,FP_SCR0
5118	set		SPRIME,FP_SCR1
5119
5120	set		POSNEG1,L_SCR1
5121	set		TWOTO63,L_SCR1
5122
5123	set		ENDFLAG,L_SCR2
5124	set		INT,L_SCR2
5125
5126	set		ADJN,L_SCR3
5127
5128############################################
5129	global		ssin
5130ssin:
5131	mov.l		&0,ADJN(%a6)		# yes; SET ADJN TO 0
5132	bra.b		SINBGN
5133
5134############################################
5135	global		scos
5136scos:
5137	mov.l		&1,ADJN(%a6)		# yes; SET ADJN TO 1
5138
5139############################################
5140SINBGN:
5141#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5142
5143	fmov.x		(%a0),%fp0		# LOAD INPUT
5144	fmov.x		%fp0,X(%a6)		# save input at X
5145
5146# "COMPACTIFY" X
5147	mov.l		(%a0),%d1		# put exp in hi word
5148	mov.w		4(%a0),%d1		# fetch hi(man)
5149	and.l		&0x7FFFFFFF,%d1		# strip sign
5150
5151	cmpi.l		%d1,&0x3FD78000		# is |X| >= 2**(-40)?
5152	bge.b		SOK1			# no
5153	bra.w		SINSM			# yes; input is very small
5154
5155SOK1:
5156	cmp.l		%d1,&0x4004BC7E		# is |X| < 15 PI?
5157	blt.b		SINMAIN			# no
5158	bra.w		SREDUCEX		# yes; input is very large
5159
5160#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5161#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5162SINMAIN:
5163	fmov.x		%fp0,%fp1
5164	fmul.d		TWOBYPI(%pc),%fp1 	# X*2/PI
5165
5166	lea		PITBL+0x200(%pc),%a1 	# TABLE OF N*PI/2, N = -32,...,32
5167
5168	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
5169
5170	mov.l		INT(%a6),%d1		# make a copy of N
5171	asl.l		&4,%d1			# N *= 16
5172	add.l		%d1,%a1			# tbl_addr = a1 + (N*16)
5173
5174# A1 IS THE ADDRESS OF N*PIBY2
5175# ...WHICH IS IN TWO PIECES Y1 & Y2
5176	fsub.x		(%a1)+,%fp0 		# X-Y1
5177	fsub.s		(%a1),%fp0 		# fp0 = R = (X-Y1)-Y2
5178
5179SINCONT:
5180#--continuation from REDUCEX
5181
5182#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5183	mov.l		INT(%a6),%d1
5184	add.l		ADJN(%a6),%d1		# SEE IF D0 IS ODD OR EVEN
5185	ror.l		&1,%d1			# D0 WAS ODD IFF D0 IS NEGATIVE
5186	cmp.l		%d1,&0
5187	blt.w		COSPOLY
5188
5189#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5190#--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5191#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5192#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5193#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5194#--WHERE T=S*S.
5195#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5196#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5197SINPOLY:
5198	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
5199
5200	fmov.x		%fp0,X(%a6)		# X IS R
5201	fmul.x		%fp0,%fp0		# FP0 IS S
5202
5203	fmov.d		SINA7(%pc),%fp3
5204	fmov.d		SINA6(%pc),%fp2
5205
5206	fmov.x		%fp0,%fp1
5207	fmul.x		%fp1,%fp1		# FP1 IS T
5208
5209	ror.l		&1,%d1
5210	and.l		&0x80000000,%d1
5211# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5212	eor.l		%d1,X(%a6)		# X IS NOW R'= SGN*R
5213
5214	fmul.x		%fp1,%fp3		# TA7
5215	fmul.x		%fp1,%fp2		# TA6
5216
5217	fadd.d		SINA5(%pc),%fp3		# A5+TA7
5218	fadd.d		SINA4(%pc),%fp2		# A4+TA6
5219
5220	fmul.x		%fp1,%fp3		# T(A5+TA7)
5221	fmul.x		%fp1,%fp2		# T(A4+TA6)
5222
5223	fadd.d		SINA3(%pc),%fp3		# A3+T(A5+TA7)
5224	fadd.x		SINA2(%pc),%fp2		# A2+T(A4+TA6)
5225
5226	fmul.x		%fp3,%fp1		# T(A3+T(A5+TA7))
5227
5228	fmul.x		%fp0,%fp2		# S(A2+T(A4+TA6))
5229	fadd.x		SINA1(%pc),%fp1		# A1+T(A3+T(A5+TA7))
5230	fmul.x		X(%a6),%fp0		# R'*S
5231
5232	fadd.x		%fp2,%fp1		# [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5233
5234	fmul.x		%fp1,%fp0		# SIN(R')-R'
5235
5236	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
5237
5238	fmov.l		%d0,%fpcr		# restore users round mode,prec
5239	fadd.x		X(%a6),%fp0		# last inst - possible exception set
5240	bra		t_inx2
5241
5242#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5243#--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5244#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5245#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5246#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5247#--WHERE T=S*S.
5248#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5249#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5250#--AND IS THEREFORE STORED AS SINGLE PRECISION.
5251COSPOLY:
5252	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
5253
5254	fmul.x		%fp0,%fp0		# FP0 IS S
5255
5256	fmov.d		COSB8(%pc),%fp2
5257	fmov.d		COSB7(%pc),%fp3
5258
5259	fmov.x		%fp0,%fp1
5260	fmul.x		%fp1,%fp1		# FP1 IS T
5261
5262	fmov.x		%fp0,X(%a6)		# X IS S
5263	ror.l		&1,%d1
5264	and.l		&0x80000000,%d1
5265# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5266
5267	fmul.x		%fp1,%fp2		# TB8
5268
5269	eor.l		%d1,X(%a6)		# X IS NOW S'= SGN*S
5270	and.l		&0x80000000,%d1
5271
5272	fmul.x		%fp1,%fp3		# TB7
5273
5274	or.l		&0x3F800000,%d1		# D0 IS SGN IN SINGLE
5275	mov.l		%d1,POSNEG1(%a6)
5276
5277	fadd.d		COSB6(%pc),%fp2		# B6+TB8
5278	fadd.d		COSB5(%pc),%fp3		# B5+TB7
5279
5280	fmul.x		%fp1,%fp2		# T(B6+TB8)
5281	fmul.x		%fp1,%fp3		# T(B5+TB7)
5282
5283	fadd.d		COSB4(%pc),%fp2		# B4+T(B6+TB8)
5284	fadd.x		COSB3(%pc),%fp3		# B3+T(B5+TB7)
5285
5286	fmul.x		%fp1,%fp2		# T(B4+T(B6+TB8))
5287	fmul.x		%fp3,%fp1		# T(B3+T(B5+TB7))
5288
5289	fadd.x		COSB2(%pc),%fp2		# B2+T(B4+T(B6+TB8))
5290	fadd.s		COSB1(%pc),%fp1		# B1+T(B3+T(B5+TB7))
5291
5292	fmul.x		%fp2,%fp0		# S(B2+T(B4+T(B6+TB8)))
5293
5294	fadd.x		%fp1,%fp0
5295
5296	fmul.x		X(%a6),%fp0
5297
5298	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
5299
5300	fmov.l		%d0,%fpcr		# restore users round mode,prec
5301	fadd.s		POSNEG1(%a6),%fp0	# last inst - possible exception set
5302	bra		t_inx2
5303
5304##############################################
5305
5306# SINe: Big OR Small?
5307#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5308#--IF |X| < 2**(-40), RETURN X OR 1.
5309SINBORS:
5310	cmp.l		%d1,&0x3FFF8000
5311	bgt.l		SREDUCEX
5312
5313SINSM:
5314	mov.l		ADJN(%a6),%d1
5315	cmp.l		%d1,&0
5316	bgt.b		COSTINY
5317
5318# here, the operation may underflow iff the precision is sgl or dbl.
5319# extended denorms are handled through another entry point.
5320SINTINY:
5321#	mov.w		&0x0000,XDCARE(%a6)	# JUST IN CASE
5322
5323	fmov.l		%d0,%fpcr		# restore users round mode,prec
5324	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5325	fmov.x		X(%a6),%fp0		# last inst - possible exception set
5326	bra		t_catch
5327
5328COSTINY:
5329	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
5330	fmov.l		%d0,%fpcr		# restore users round mode,prec
5331	fadd.s 		&0x80800000,%fp0	# last inst - possible exception set
5332	bra		t_pinx2
5333
5334################################################
5335	global		ssind
5336#--SIN(X) = X FOR DENORMALIZED X
5337ssind:
5338	bra		t_extdnrm
5339
5340############################################
5341	global		scosd
5342#--COS(X) = 1 FOR DENORMALIZED X
5343scosd:
5344	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
5345	bra		t_pinx2
5346
5347##################################################
5348
5349	global		ssincos
5350ssincos:
5351#--SET ADJN TO 4
5352	mov.l		&4,ADJN(%a6)
5353
5354	fmov.x		(%a0),%fp0		# LOAD INPUT
5355	fmov.x		%fp0,X(%a6)
5356
5357	mov.l		(%a0),%d1
5358	mov.w		4(%a0),%d1
5359	and.l		&0x7FFFFFFF,%d1		# COMPACTIFY X
5360
5361	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
5362	bge.b		SCOK1
5363	bra.w		SCSM
5364
5365SCOK1:
5366	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
5367	blt.b		SCMAIN
5368	bra.w		SREDUCEX
5369
5370
5371#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5372#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5373SCMAIN:
5374	fmov.x		%fp0,%fp1
5375
5376	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5377
5378	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5379
5380	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
5381
5382	mov.l		INT(%a6),%d1
5383	asl.l		&4,%d1
5384	add.l		%d1,%a1			# ADDRESS OF N*PIBY2, IN Y1, Y2
5385
5386	fsub.x		(%a1)+,%fp0		# X-Y1
5387	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
5388
5389SCCONT:
5390#--continuation point from REDUCEX
5391
5392	mov.l		INT(%a6),%d1
5393	ror.l		&1,%d1
5394	cmp.l		%d1,&0			# D0 < 0 IFF N IS ODD
5395	bge.w		NEVEN
5396
5397SNODD:
5398#--REGISTERS SAVED SO FAR: D0, A0, FP2.
5399	fmovm.x		&0x04,-(%sp)		# save fp2
5400
5401	fmov.x		%fp0,RPRIME(%a6)
5402	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
5403	fmov.d		SINA7(%pc),%fp1		# A7
5404	fmov.d		COSB8(%pc),%fp2		# B8
5405	fmul.x		%fp0,%fp1		# SA7
5406	fmul.x		%fp0,%fp2		# SB8
5407
5408	mov.l		%d2,-(%sp)
5409	mov.l		%d1,%d2
5410	ror.l		&1,%d2
5411	and.l		&0x80000000,%d2
5412	eor.l		%d1,%d2
5413	and.l		&0x80000000,%d2
5414
5415	fadd.d		SINA6(%pc),%fp1		# A6+SA7
5416	fadd.d		COSB7(%pc),%fp2		# B7+SB8
5417
5418	fmul.x		%fp0,%fp1		# S(A6+SA7)
5419	eor.l		%d2,RPRIME(%a6)
5420	mov.l		(%sp)+,%d2
5421	fmul.x		%fp0,%fp2		# S(B7+SB8)
5422	ror.l		&1,%d1
5423	and.l		&0x80000000,%d1
5424	mov.l		&0x3F800000,POSNEG1(%a6)
5425	eor.l		%d1,POSNEG1(%a6)
5426
5427	fadd.d		SINA5(%pc),%fp1		# A5+S(A6+SA7)
5428	fadd.d		COSB6(%pc),%fp2		# B6+S(B7+SB8)
5429
5430	fmul.x		%fp0,%fp1		# S(A5+S(A6+SA7))
5431	fmul.x		%fp0,%fp2		# S(B6+S(B7+SB8))
5432	fmov.x		%fp0,SPRIME(%a6)
5433
5434	fadd.d		SINA4(%pc),%fp1		# A4+S(A5+S(A6+SA7))
5435	eor.l		%d1,SPRIME(%a6)
5436	fadd.d		COSB5(%pc),%fp2		# B5+S(B6+S(B7+SB8))
5437
5438	fmul.x		%fp0,%fp1		# S(A4+...)
5439	fmul.x		%fp0,%fp2		# S(B5+...)
5440
5441	fadd.d		SINA3(%pc),%fp1		# A3+S(A4+...)
5442	fadd.d		COSB4(%pc),%fp2		# B4+S(B5+...)
5443
5444	fmul.x		%fp0,%fp1		# S(A3+...)
5445	fmul.x		%fp0,%fp2		# S(B4+...)
5446
5447	fadd.x		SINA2(%pc),%fp1		# A2+S(A3+...)
5448	fadd.x		COSB3(%pc),%fp2		# B3+S(B4+...)
5449
5450	fmul.x		%fp0,%fp1		# S(A2+...)
5451	fmul.x		%fp0,%fp2		# S(B3+...)
5452
5453	fadd.x		SINA1(%pc),%fp1		# A1+S(A2+...)
5454	fadd.x		COSB2(%pc),%fp2		# B2+S(B3+...)
5455
5456	fmul.x		%fp0,%fp1		# S(A1+...)
5457	fmul.x		%fp2,%fp0		# S(B2+...)
5458
5459	fmul.x		RPRIME(%a6),%fp1	# R'S(A1+...)
5460	fadd.s		COSB1(%pc),%fp0		# B1+S(B2...)
5461	fmul.x		SPRIME(%a6),%fp0	# S'(B1+S(B2+...))
5462
5463	fmovm.x		(%sp)+,&0x20		# restore fp2
5464
5465	fmov.l		%d0,%fpcr
5466	fadd.x		RPRIME(%a6),%fp1	# COS(X)
5467	bsr		sto_cos			# store cosine result
5468	fadd.s		POSNEG1(%a6),%fp0	# SIN(X)
5469	bra		t_inx2
5470
5471NEVEN:
5472#--REGISTERS SAVED SO FAR: FP2.
5473	fmovm.x		&0x04,-(%sp)		# save fp2
5474
5475	fmov.x		%fp0,RPRIME(%a6)
5476	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
5477
5478	fmov.d		COSB8(%pc),%fp1		# B8
5479	fmov.d		SINA7(%pc),%fp2		# A7
5480
5481	fmul.x		%fp0,%fp1		# SB8
5482	fmov.x		%fp0,SPRIME(%a6)
5483	fmul.x		%fp0,%fp2		# SA7
5484
5485	ror.l		&1,%d1
5486	and.l		&0x80000000,%d1
5487
5488	fadd.d		COSB7(%pc),%fp1		# B7+SB8
5489	fadd.d		SINA6(%pc),%fp2		# A6+SA7
5490
5491	eor.l		%d1,RPRIME(%a6)
5492	eor.l		%d1,SPRIME(%a6)
5493
5494	fmul.x		%fp0,%fp1		# S(B7+SB8)
5495
5496	or.l		&0x3F800000,%d1
5497	mov.l		%d1,POSNEG1(%a6)
5498
5499	fmul.x		%fp0,%fp2		# S(A6+SA7)
5500
5501	fadd.d		COSB6(%pc),%fp1		# B6+S(B7+SB8)
5502	fadd.d		SINA5(%pc),%fp2		# A5+S(A6+SA7)
5503
5504	fmul.x		%fp0,%fp1		# S(B6+S(B7+SB8))
5505	fmul.x		%fp0,%fp2		# S(A5+S(A6+SA7))
5506
5507	fadd.d		COSB5(%pc),%fp1		# B5+S(B6+S(B7+SB8))
5508	fadd.d		SINA4(%pc),%fp2		# A4+S(A5+S(A6+SA7))
5509
5510	fmul.x		%fp0,%fp1		# S(B5+...)
5511	fmul.x		%fp0,%fp2		# S(A4+...)
5512
5513	fadd.d		COSB4(%pc),%fp1		# B4+S(B5+...)
5514	fadd.d		SINA3(%pc),%fp2		# A3+S(A4+...)
5515
5516	fmul.x		%fp0,%fp1		# S(B4+...)
5517	fmul.x		%fp0,%fp2		# S(A3+...)
5518
5519	fadd.x		COSB3(%pc),%fp1		# B3+S(B4+...)
5520	fadd.x		SINA2(%pc),%fp2		# A2+S(A3+...)
5521
5522	fmul.x		%fp0,%fp1		# S(B3+...)
5523	fmul.x		%fp0,%fp2		# S(A2+...)
5524
5525	fadd.x		COSB2(%pc),%fp1		# B2+S(B3+...)
5526	fadd.x		SINA1(%pc),%fp2		# A1+S(A2+...)
5527
5528	fmul.x		%fp0,%fp1		# S(B2+...)
5529	fmul.x		%fp2,%fp0		# s(a1+...)
5530
5531
5532	fadd.s		COSB1(%pc),%fp1		# B1+S(B2...)
5533	fmul.x		RPRIME(%a6),%fp0	# R'S(A1+...)
5534	fmul.x		SPRIME(%a6),%fp1	# S'(B1+S(B2+...))
5535
5536	fmovm.x		(%sp)+,&0x20		# restore fp2
5537
5538	fmov.l		%d0,%fpcr
5539	fadd.s		POSNEG1(%a6),%fp1	# COS(X)
5540	bsr		sto_cos			# store cosine result
5541	fadd.x		RPRIME(%a6),%fp0	# SIN(X)
5542	bra		t_inx2
5543
5544################################################
5545
5546SCBORS:
5547	cmp.l		%d1,&0x3FFF8000
5548	bgt.w		SREDUCEX
5549
5550################################################
5551
5552SCSM:
5553#	mov.w		&0x0000,XDCARE(%a6)
5554	fmov.s		&0x3F800000,%fp1
5555
5556	fmov.l		%d0,%fpcr
5557	fsub.s		&0x00800000,%fp1
5558	bsr		sto_cos			# store cosine result
5559	fmov.l		%fpcr,%d0		# d0 must have fpcr,too
5560	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5561	fmov.x		X(%a6),%fp0
5562	bra		t_catch
5563
5564##############################################
5565
5566	global		ssincosd
5567#--SIN AND COS OF X FOR DENORMALIZED X
5568ssincosd:
5569	mov.l		%d0,-(%sp)		# save d0
5570	fmov.s		&0x3F800000,%fp1
5571	bsr		sto_cos			# store cosine result
5572	mov.l		(%sp)+,%d0		# restore d0
5573	bra		t_extdnrm
5574
5575############################################
5576
5577#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5578#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5579#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5580SREDUCEX:
5581	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
5582	mov.l		%d2,-(%sp)		# save d2
5583	fmov.s		&0x00000000,%fp1	# fp1 = 0
5584
5585#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5586#--there is a danger of unwanted overflow in first LOOP iteration.  In this
5587#--case, reduce argument by one remainder step to make subsequent reduction
5588#--safe.
5589	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
5590	bne.b		SLOOP			# no
5591
5592# yes; create 2**16383*PI/2
5593	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
5594	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
5595	clr.l		FP_SCR0_LO(%a6)
5596
5597# create low half of 2**16383*PI/2 at FP_SCR1
5598	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
5599	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
5600	clr.l		FP_SCR1_LO(%a6)
5601
5602	ftest.x		%fp0			# test sign of argument
5603	fblt.w		sred_neg
5604
5605	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
5606	or.b		&0x80,FP_SCR1_EX(%a6)
5607sred_neg:
5608	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
5609	fmov.x		%fp0,%fp1		# save high result in fp1
5610	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
5611	fsub.x		%fp0,%fp1		# determine low component of result
5612	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
5613
5614#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5615#--integer quotient will be stored in N
5616#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5617SLOOP:
5618	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
5619	mov.w		INARG(%a6),%d1
5620	mov.l		%d1,%a1			# save a copy of D0
5621	and.l		&0x00007FFF,%d1
5622	sub.l		&0x00003FFF,%d1		# d0 = K
5623	cmp.l		%d1,&28
5624	ble.b		SLASTLOOP
5625SCONTLOOP:
5626	sub.l		&27,%d1			# d0 = L := K-27
5627	mov.b		&0,ENDFLAG(%a6)
5628	bra.b		SWORK
5629SLASTLOOP:
5630	clr.l		%d1			# d0 = L := 0
5631	mov.b		&1,ENDFLAG(%a6)
5632
5633SWORK:
5634#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
5635#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
5636
5637#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5638#--2**L * (PIby2_1), 2**L * (PIby2_2)
5639
5640	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
5641	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
5642
5643	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
5644	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
5645	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
5646
5647	fmov.x		%fp0,%fp2
5648	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
5649
5650#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5651#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
5652#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5653#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
5654#--US THE DESIRED VALUE IN FLOATING POINT.
5655	mov.l		%a1,%d2
5656	swap		%d2
5657	and.l		&0x80000000,%d2
5658	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
5659	mov.l		%d2,TWOTO63(%a6)
5660	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
5661	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
5662#	fint.x		%fp2
5663
5664#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5665	mov.l		%d1,%d2			# d2 = L
5666
5667	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
5668	mov.w		%d2,FP_SCR0_EX(%a6)
5669	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
5670	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
5671
5672	add.l		&0x00003FDD,%d1
5673	mov.w		%d1,FP_SCR1_EX(%a6)
5674	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
5675	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
5676
5677	mov.b		ENDFLAG(%a6),%d1
5678
5679#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5680#--P2 = 2**(L) * Piby2_2
5681	fmov.x		%fp2,%fp4		# fp4 = N
5682	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
5683	fmov.x		%fp2,%fp5		# fp5 = N
5684	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
5685	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
5686
5687#--we want P+p = W+w  but  |p| <= half ulp of P
5688#--Then, we need to compute  A := R-P   and  a := r-p
5689	fadd.x		%fp5,%fp3		# fp3 = P
5690	fsub.x		%fp3,%fp4		# fp4 = W-P
5691
5692	fsub.x		%fp3,%fp0		# fp0 = A := R - P
5693	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
5694
5695	fmov.x		%fp0,%fp3		# fp3 = A
5696	fsub.x		%fp4,%fp1		# fp1 = a := r - p
5697
5698#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
5699#--|r| <= half ulp of R.
5700	fadd.x		%fp1,%fp0		# fp0 = R := A+a
5701#--No need to calculate r if this is the last loop
5702	cmp.b		%d1,&0
5703	bgt.w		SRESTORE
5704
5705#--Need to calculate r
5706	fsub.x		%fp0,%fp3		# fp3 = A-R
5707	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
5708	bra.w		SLOOP
5709
5710SRESTORE:
5711	fmov.l		%fp2,INT(%a6)
5712	mov.l		(%sp)+,%d2		# restore d2
5713	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
5714
5715	mov.l		ADJN(%a6),%d1
5716	cmp.l		%d1,&4
5717
5718	blt.w		SINCONT
5719	bra.w		SCCONT
5720
5721#########################################################################
5722# stan():  computes the tangent of a normalized input			#
5723# stand(): computes the tangent of a denormalized input			#
5724#									#
5725# INPUT *************************************************************** #
5726#	a0 = pointer to extended precision input			#
5727#	d0 = round precision,mode					#
5728#									#
5729# OUTPUT ************************************************************** #
5730#	fp0 = tan(X)							#
5731#									#
5732# ACCURACY and MONOTONICITY ******************************************* #
5733#	The returned result is within 3 ulp in 64 significant bit, i.e. #
5734#	within 0.5001 ulp to 53 bits if the result is subsequently	#
5735#	rounded to double precision. The result is provably monotonic	#
5736#	in double precision.						#
5737#									#
5738# ALGORITHM *********************************************************** #
5739#									#
5740#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
5741#									#
5742#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5743#		k = N mod 2, so in particular, k = 0 or 1.		#
5744#									#
5745#	3. If k is odd, go to 5.					#
5746#									#
5747#	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a	#
5748#		rational function U/V where				#
5749#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
5750#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.	#
5751#		Exit.							#
5752#									#
5753#	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5754#		a rational function U/V where				#
5755#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
5756#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,	#
5757#		-Cot(r) = -V/U. Exit.					#
5758#									#
5759#	6. If |X| > 1, go to 8.						#
5760#									#
5761#	7. (|X|<2**(-40)) Tan(X) = X. Exit.				#
5762#									#
5763#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back 	#
5764#		to 2.							#
5765#									#
5766#########################################################################
5767
5768TANQ4:
5769	long		0x3EA0B759,0xF50F8688
5770TANP3:
5771	long		0xBEF2BAA5,0xA8924F04
5772
5773TANQ3:
5774	long		0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5775
5776TANP2:
5777	long		0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5778
5779TANQ2:
5780	long		0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5781
5782TANP1:
5783	long		0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5784
5785TANQ1:
5786	long		0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5787
5788INVTWOPI:
5789	long		0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5790
5791TWOPI1:
5792	long		0x40010000,0xC90FDAA2,0x00000000,0x00000000
5793TWOPI2:
5794	long		0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5795
5796#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5797#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5798#--MOST 69 BITS LONG.
5799#	global		PITBL
5800PITBL:
5801	long		0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5802	long		0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5803	long		0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5804	long		0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5805	long		0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5806	long		0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5807	long		0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5808	long		0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5809	long		0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5810	long		0xC0040000,0x90836524,0x88034B96,0x20B00000
5811	long		0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5812	long		0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5813	long		0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5814	long		0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5815	long		0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5816	long		0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5817	long		0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5818	long		0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5819	long		0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5820	long		0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5821	long		0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5822	long		0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5823	long		0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5824	long		0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5825	long		0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5826	long		0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5827	long		0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5828	long		0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5829	long		0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5830	long		0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5831	long		0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5832	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5833	long		0x00000000,0x00000000,0x00000000,0x00000000
5834	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5835	long		0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5836	long		0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5837	long		0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5838	long		0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5839	long		0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5840	long		0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5841	long		0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5842	long		0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5843	long		0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5844	long		0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5845	long		0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5846	long		0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5847	long		0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5848	long		0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5849	long		0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5850	long		0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5851	long		0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5852	long		0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5853	long		0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5854	long		0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5855	long		0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5856	long		0x40040000,0x90836524,0x88034B96,0xA0B00000
5857	long		0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5858	long		0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5859	long		0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5860	long		0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5861	long		0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5862	long		0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5863	long		0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5864	long		0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5865	long		0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5866
5867	set		INARG,FP_SCR0
5868
5869	set		TWOTO63,L_SCR1
5870	set		INT,L_SCR1
5871	set		ENDFLAG,L_SCR2
5872
5873	global		stan
5874stan:
5875	fmov.x		(%a0),%fp0		# LOAD INPUT
5876
5877	mov.l		(%a0),%d1
5878	mov.w		4(%a0),%d1
5879	and.l		&0x7FFFFFFF,%d1
5880
5881	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
5882	bge.b		TANOK1
5883	bra.w		TANSM
5884TANOK1:
5885	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
5886	blt.b		TANMAIN
5887	bra.w		REDUCEX
5888
5889TANMAIN:
5890#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5891#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5892	fmov.x		%fp0,%fp1
5893	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5894
5895	lea.l		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5896
5897	fmov.l		%fp1,%d1		# CONVERT TO INTEGER
5898
5899	asl.l		&4,%d1
5900	add.l		%d1,%a1			# ADDRESS N*PIBY2 IN Y1, Y2
5901
5902	fsub.x		(%a1)+,%fp0		# X-Y1
5903
5904	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
5905
5906	ror.l		&5,%d1
5907	and.l		&0x80000000,%d1		# D0 WAS ODD IFF D0 < 0
5908
5909TANCONT:
5910	fmovm.x		&0x0c,-(%sp)		# save fp2,fp3
5911
5912	cmp.l		%d1,&0
5913	blt.w		NODD
5914
5915	fmov.x		%fp0,%fp1
5916	fmul.x		%fp1,%fp1		# S = R*R
5917
5918	fmov.d		TANQ4(%pc),%fp3
5919	fmov.d		TANP3(%pc),%fp2
5920
5921	fmul.x		%fp1,%fp3		# SQ4
5922	fmul.x		%fp1,%fp2		# SP3
5923
5924	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
5925	fadd.x		TANP2(%pc),%fp2		# P2+SP3
5926
5927	fmul.x		%fp1,%fp3		# S(Q3+SQ4)
5928	fmul.x		%fp1,%fp2		# S(P2+SP3)
5929
5930	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
5931	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
5932
5933	fmul.x		%fp1,%fp3		# S(Q2+S(Q3+SQ4))
5934	fmul.x		%fp1,%fp2		# S(P1+S(P2+SP3))
5935
5936	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
5937	fmul.x		%fp0,%fp2		# RS(P1+S(P2+SP3))
5938
5939	fmul.x		%fp3,%fp1		# S(Q1+S(Q2+S(Q3+SQ4)))
5940
5941	fadd.x		%fp2,%fp0		# R+RS(P1+S(P2+SP3))
5942
5943	fadd.s		&0x3F800000,%fp1	# 1+S(Q1+...)
5944
5945	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
5946
5947	fmov.l		%d0,%fpcr		# restore users round mode,prec
5948	fdiv.x		%fp1,%fp0		# last inst - possible exception set
5949	bra		t_inx2
5950
5951NODD:
5952	fmov.x		%fp0,%fp1
5953	fmul.x		%fp0,%fp0		# S = R*R
5954
5955	fmov.d		TANQ4(%pc),%fp3
5956	fmov.d		TANP3(%pc),%fp2
5957
5958	fmul.x		%fp0,%fp3		# SQ4
5959	fmul.x		%fp0,%fp2		# SP3
5960
5961	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
5962	fadd.x		TANP2(%pc),%fp2		# P2+SP3
5963
5964	fmul.x		%fp0,%fp3		# S(Q3+SQ4)
5965	fmul.x		%fp0,%fp2		# S(P2+SP3)
5966
5967	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
5968	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
5969
5970	fmul.x		%fp0,%fp3		# S(Q2+S(Q3+SQ4))
5971	fmul.x		%fp0,%fp2		# S(P1+S(P2+SP3))
5972
5973	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
5974	fmul.x		%fp1,%fp2		# RS(P1+S(P2+SP3))
5975
5976	fmul.x		%fp3,%fp0		# S(Q1+S(Q2+S(Q3+SQ4)))
5977
5978	fadd.x		%fp2,%fp1		# R+RS(P1+S(P2+SP3))
5979	fadd.s		&0x3F800000,%fp0	# 1+S(Q1+...)
5980
5981	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
5982
5983	fmov.x		%fp1,-(%sp)
5984	eor.l		&0x80000000,(%sp)
5985
5986	fmov.l		%d0,%fpcr		# restore users round mode,prec
5987	fdiv.x		(%sp)+,%fp0		# last inst - possible exception set
5988	bra		t_inx2
5989
5990TANBORS:
5991#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5992#--IF |X| < 2**(-40), RETURN X OR 1.
5993	cmp.l		%d1,&0x3FFF8000
5994	bgt.b		REDUCEX
5995
5996TANSM:
5997	fmov.x		%fp0,-(%sp)
5998	fmov.l		%d0,%fpcr		# restore users round mode,prec
5999	mov.b		&FMOV_OP,%d1		# last inst is MOVE
6000	fmov.x		(%sp)+,%fp0		# last inst - posibble exception set
6001	bra		t_catch
6002
6003	global		stand
6004#--TAN(X) = X FOR DENORMALIZED X
6005stand:
6006	bra		t_extdnrm
6007
6008#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
6009#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
6010#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
6011REDUCEX:
6012	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
6013	mov.l		%d2,-(%sp)		# save d2
6014	fmov.s		&0x00000000,%fp1	# fp1 = 0
6015
6016#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
6017#--there is a danger of unwanted overflow in first LOOP iteration.  In this
6018#--case, reduce argument by one remainder step to make subsequent reduction
6019#--safe.
6020	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
6021	bne.b		LOOP			# no
6022
6023# yes; create 2**16383*PI/2
6024	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
6025	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
6026	clr.l		FP_SCR0_LO(%a6)
6027
6028# create low half of 2**16383*PI/2 at FP_SCR1
6029	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
6030	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
6031	clr.l		FP_SCR1_LO(%a6)
6032
6033	ftest.x		%fp0			# test sign of argument
6034	fblt.w		red_neg
6035
6036	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
6037	or.b		&0x80,FP_SCR1_EX(%a6)
6038red_neg:
6039	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
6040	fmov.x		%fp0,%fp1		# save high result in fp1
6041	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
6042	fsub.x		%fp0,%fp1		# determine low component of result
6043	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
6044
6045#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
6046#--integer quotient will be stored in N
6047#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
6048LOOP:
6049	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
6050	mov.w		INARG(%a6),%d1
6051	mov.l		%d1,%a1			# save a copy of D0
6052	and.l		&0x00007FFF,%d1
6053	sub.l		&0x00003FFF,%d1		# d0 = K
6054	cmp.l		%d1,&28
6055	ble.b		LASTLOOP
6056CONTLOOP:
6057	sub.l		&27,%d1			# d0 = L := K-27
6058	mov.b		&0,ENDFLAG(%a6)
6059	bra.b		WORK
6060LASTLOOP:
6061	clr.l		%d1			# d0 = L := 0
6062	mov.b		&1,ENDFLAG(%a6)
6063
6064WORK:
6065#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
6066#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
6067
6068#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
6069#--2**L * (PIby2_1), 2**L * (PIby2_2)
6070
6071	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
6072	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
6073
6074	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
6075	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
6076	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
6077
6078	fmov.x		%fp0,%fp2
6079	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
6080
6081#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
6082#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
6083#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
6084#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
6085#--US THE DESIRED VALUE IN FLOATING POINT.
6086	mov.l		%a1,%d2
6087	swap		%d2
6088	and.l		&0x80000000,%d2
6089	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
6090	mov.l		%d2,TWOTO63(%a6)
6091	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
6092	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
6093#	fintrz.x	%fp2,%fp2
6094
6095#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
6096	mov.l		%d1,%d2			# d2 = L
6097
6098	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
6099	mov.w		%d2,FP_SCR0_EX(%a6)
6100	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
6101	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
6102
6103	add.l		&0x00003FDD,%d1
6104	mov.w		%d1,FP_SCR1_EX(%a6)
6105	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
6106	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
6107
6108	mov.b		ENDFLAG(%a6),%d1
6109
6110#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6111#--P2 = 2**(L) * Piby2_2
6112	fmov.x		%fp2,%fp4		# fp4 = N
6113	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
6114	fmov.x		%fp2,%fp5		# fp5 = N
6115	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
6116	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
6117
6118#--we want P+p = W+w  but  |p| <= half ulp of P
6119#--Then, we need to compute  A := R-P   and  a := r-p
6120	fadd.x		%fp5,%fp3		# fp3 = P
6121	fsub.x		%fp3,%fp4		# fp4 = W-P
6122
6123	fsub.x		%fp3,%fp0		# fp0 = A := R - P
6124	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
6125
6126	fmov.x		%fp0,%fp3		# fp3 = A
6127	fsub.x		%fp4,%fp1		# fp1 = a := r - p
6128
6129#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
6130#--|r| <= half ulp of R.
6131	fadd.x		%fp1,%fp0		# fp0 = R := A+a
6132#--No need to calculate r if this is the last loop
6133	cmp.b		%d1,&0
6134	bgt.w		RESTORE
6135
6136#--Need to calculate r
6137	fsub.x		%fp0,%fp3		# fp3 = A-R
6138	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
6139	bra.w		LOOP
6140
6141RESTORE:
6142	fmov.l		%fp2,INT(%a6)
6143	mov.l		(%sp)+,%d2		# restore d2
6144	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
6145
6146	mov.l		INT(%a6),%d1
6147	ror.l		&1,%d1
6148
6149	bra.w		TANCONT
6150
6151#########################################################################
6152# satan():  computes the arctangent of a normalized number		#
6153# satand(): computes the arctangent of a denormalized number		#
6154#									#
6155# INPUT	*************************************************************** #
6156#	a0 = pointer to extended precision input			#
6157#	d0 = round precision,mode					#
6158#									#
6159# OUTPUT ************************************************************** #
6160#	fp0 = arctan(X)							#
6161#									#
6162# ACCURACY and MONOTONICITY ******************************************* #
6163#	The returned result is within 2 ulps in	64 significant bit,	#
6164#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6165#	rounded to double precision. The result is provably monotonic	#
6166#	in double precision. 						#
6167#									#
6168# ALGORITHM *********************************************************** #
6169#	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.		#
6170#									#
6171#	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. 			#
6172#		Note that k = -4, -3,..., or 3.				#
6173#		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 	#
6174#		significant bits of X with a bit-1 attached at the 6-th	#
6175#		bit position. Define u to be u = (X-F) / (1 + X*F).	#
6176#									#
6177#	Step 3. Approximate arctan(u) by a polynomial poly.		#
6178#									#
6179#	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a 	#
6180#		table of values calculated beforehand. Exit.		#
6181#									#
6182#	Step 5. If |X| >= 16, go to Step 7.				#
6183#									#
6184#	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.	#
6185#									#
6186#	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd 	#
6187#		polynomial in X'.					#
6188#		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.		#
6189#									#
6190#########################################################################
6191
6192ATANA3:	long		0xBFF6687E,0x314987D8
6193ATANA2:	long		0x4002AC69,0x34A26DB3
6194ATANA1:	long		0xBFC2476F,0x4E1DA28E
6195
6196ATANB6:	long		0x3FB34444,0x7F876989
6197ATANB5:	long		0xBFB744EE,0x7FAF45DB
6198ATANB4:	long		0x3FBC71C6,0x46940220
6199ATANB3:	long		0xBFC24924,0x921872F9
6200ATANB2:	long		0x3FC99999,0x99998FA9
6201ATANB1:	long		0xBFD55555,0x55555555
6202
6203ATANC5:	long		0xBFB70BF3,0x98539E6A
6204ATANC4:	long		0x3FBC7187,0x962D1D7D
6205ATANC3:	long		0xBFC24924,0x827107B8
6206ATANC2:	long		0x3FC99999,0x9996263E
6207ATANC1:	long		0xBFD55555,0x55555536
6208
6209PPIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6210NPIBY2:	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6211
6212PTINY:	long		0x00010000,0x80000000,0x00000000,0x00000000
6213NTINY:	long		0x80010000,0x80000000,0x00000000,0x00000000
6214
6215ATANTBL:
6216	long		0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6217	long		0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6218	long		0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6219	long		0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6220	long		0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6221	long		0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6222	long		0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6223	long		0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6224	long		0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6225	long		0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6226	long		0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6227	long		0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6228	long		0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6229	long		0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6230	long		0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6231	long		0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6232	long		0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6233	long		0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6234	long		0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6235	long		0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6236	long		0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6237	long		0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6238	long		0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6239	long		0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6240	long		0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6241	long		0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6242	long		0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6243	long		0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6244	long		0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6245	long		0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6246	long		0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6247	long		0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6248	long		0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6249	long		0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6250	long		0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6251	long		0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6252	long		0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6253	long		0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6254	long		0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6255	long		0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6256	long		0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6257	long		0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6258	long		0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6259	long		0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6260	long		0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6261	long		0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6262	long		0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6263	long		0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6264	long		0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6265	long		0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6266	long		0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6267	long		0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6268	long		0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6269	long		0x3FFE0000,0x97731420,0x365E538C,0x00000000
6270	long		0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6271	long		0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6272	long		0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6273	long		0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6274	long		0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6275	long		0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6276	long		0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6277	long		0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6278	long		0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6279	long		0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6280	long		0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6281	long		0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6282	long		0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6283	long		0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6284	long		0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6285	long		0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6286	long		0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6287	long		0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6288	long		0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6289	long		0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6290	long		0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6291	long		0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6292	long		0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6293	long		0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6294	long		0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6295	long		0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6296	long		0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6297	long		0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6298	long		0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6299	long		0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6300	long		0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6301	long		0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6302	long		0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6303	long		0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6304	long		0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6305	long		0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6306	long		0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6307	long		0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6308	long		0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6309	long		0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6310	long		0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6311	long		0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6312	long		0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6313	long		0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6314	long		0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6315	long		0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6316	long		0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6317	long		0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6318	long		0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6319	long		0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6320	long		0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6321	long		0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6322	long		0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6323	long		0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6324	long		0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6325	long		0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6326	long		0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6327	long		0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6328	long		0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6329	long		0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6330	long		0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6331	long		0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6332	long		0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6333	long		0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6334	long		0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6335	long		0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6336	long		0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6337	long		0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6338	long		0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6339	long		0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6340	long		0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6341	long		0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6342	long		0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6343	long		0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6344
6345	set		X,FP_SCR0
6346	set		XDCARE,X+2
6347	set		XFRAC,X+4
6348	set		XFRACLO,X+8
6349
6350	set		ATANF,FP_SCR1
6351	set		ATANFHI,ATANF+4
6352	set		ATANFLO,ATANF+8
6353
6354	global		satan
6355#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6356satan:
6357	fmov.x		(%a0),%fp0		# LOAD INPUT
6358
6359	mov.l		(%a0),%d1
6360	mov.w		4(%a0),%d1
6361	fmov.x		%fp0,X(%a6)
6362	and.l		&0x7FFFFFFF,%d1
6363
6364	cmp.l		%d1,&0x3FFB8000		# |X| >= 1/16?
6365	bge.b		ATANOK1
6366	bra.w		ATANSM
6367
6368ATANOK1:
6369	cmp.l		%d1,&0x4002FFFF		# |X| < 16 ?
6370	ble.b		ATANMAIN
6371	bra.w		ATANBIG
6372
6373#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6374#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6375#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6376#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6377#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6378#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6379#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6380#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6381#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6382#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6383#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6384#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6385#--WILL INVOLVE A VERY LONG POLYNOMIAL.
6386
6387#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6388#--WE CHOSE F TO BE +-2^K * 1.BBBB1
6389#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6390#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6391#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6392#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6393
6394ATANMAIN:
6395
6396	and.l		&0xF8000000,XFRAC(%a6)	# FIRST 5 BITS
6397	or.l		&0x04000000,XFRAC(%a6)	# SET 6-TH BIT TO 1
6398	mov.l		&0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6399
6400	fmov.x		%fp0,%fp1		# FP1 IS X
6401	fmul.x		X(%a6),%fp1		# FP1 IS X*F, NOTE THAT X*F > 0
6402	fsub.x		X(%a6),%fp0		# FP0 IS X-F
6403	fadd.s		&0x3F800000,%fp1	# FP1 IS 1 + X*F
6404	fdiv.x		%fp1,%fp0		# FP0 IS U = (X-F)/(1+X*F)
6405
6406#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6407#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6408#--SAVE REGISTERS FP2.
6409
6410	mov.l		%d2,-(%sp)		# SAVE d2 TEMPORARILY
6411	mov.l		%d1,%d2			# THE EXP AND 16 BITS OF X
6412	and.l		&0x00007800,%d1		# 4 VARYING BITS OF F'S FRACTION
6413	and.l		&0x7FFF0000,%d2		# EXPONENT OF F
6414	sub.l		&0x3FFB0000,%d2		# K+4
6415	asr.l		&1,%d2
6416	add.l		%d2,%d1			# THE 7 BITS IDENTIFYING F
6417	asr.l		&7,%d1			# INDEX INTO TBL OF ATAN(|F|)
6418	lea		ATANTBL(%pc),%a1
6419	add.l		%d1,%a1			# ADDRESS OF ATAN(|F|)
6420	mov.l		(%a1)+,ATANF(%a6)
6421	mov.l		(%a1)+,ATANFHI(%a6)
6422	mov.l		(%a1)+,ATANFLO(%a6)	# ATANF IS NOW ATAN(|F|)
6423	mov.l		X(%a6),%d1		# LOAD SIGN AND EXPO. AGAIN
6424	and.l		&0x80000000,%d1		# SIGN(F)
6425	or.l		%d1,ATANF(%a6)		# ATANF IS NOW SIGN(F)*ATAN(|F|)
6426	mov.l		(%sp)+,%d2		# RESTORE d2
6427
6428#--THAT'S ALL I HAVE TO DO FOR NOW,
6429#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6430
6431#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6432#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6433#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6434#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6435#--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
6436#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6437#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6438
6439	fmovm.x		&0x04,-(%sp)		# save fp2
6440
6441	fmov.x		%fp0,%fp1
6442	fmul.x		%fp1,%fp1
6443	fmov.d		ATANA3(%pc),%fp2
6444	fadd.x		%fp1,%fp2		# A3+V
6445	fmul.x		%fp1,%fp2		# V*(A3+V)
6446	fmul.x		%fp0,%fp1		# U*V
6447	fadd.d		ATANA2(%pc),%fp2	# A2+V*(A3+V)
6448	fmul.d		ATANA1(%pc),%fp1	# A1*U*V
6449	fmul.x		%fp2,%fp1		# A1*U*V*(A2+V*(A3+V))
6450	fadd.x		%fp1,%fp0		# ATAN(U), FP1 RELEASED
6451
6452	fmovm.x 	(%sp)+,&0x20		# restore fp2
6453
6454	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6455	fadd.x		ATANF(%a6),%fp0		# ATAN(X)
6456	bra		t_inx2
6457
6458ATANBORS:
6459#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6460#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6461	cmp.l		%d1,&0x3FFF8000
6462	bgt.w		ATANBIG			# I.E. |X| >= 16
6463
6464ATANSM:
6465#--|X| <= 1/16
6466#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6467#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6468#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6469#--WHERE Y = X*X, AND Z = Y*Y.
6470
6471	cmp.l		%d1,&0x3FD78000
6472	blt.w		ATANTINY
6473
6474#--COMPUTE POLYNOMIAL
6475	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
6476
6477	fmul.x		%fp0,%fp0		# FPO IS Y = X*X
6478
6479	fmov.x		%fp0,%fp1
6480	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
6481
6482	fmov.d		ATANB6(%pc),%fp2
6483	fmov.d		ATANB5(%pc),%fp3
6484
6485	fmul.x		%fp1,%fp2		# Z*B6
6486	fmul.x		%fp1,%fp3		# Z*B5
6487
6488	fadd.d		ATANB4(%pc),%fp2	# B4+Z*B6
6489	fadd.d		ATANB3(%pc),%fp3	# B3+Z*B5
6490
6491	fmul.x		%fp1,%fp2		# Z*(B4+Z*B6)
6492	fmul.x		%fp3,%fp1		# Z*(B3+Z*B5)
6493
6494	fadd.d		ATANB2(%pc),%fp2	# B2+Z*(B4+Z*B6)
6495	fadd.d		ATANB1(%pc),%fp1	# B1+Z*(B3+Z*B5)
6496
6497	fmul.x		%fp0,%fp2		# Y*(B2+Z*(B4+Z*B6))
6498	fmul.x		X(%a6),%fp0		# X*Y
6499
6500	fadd.x		%fp2,%fp1		# [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6501
6502	fmul.x		%fp1,%fp0		# X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6503
6504	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
6505
6506	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6507	fadd.x		X(%a6),%fp0
6508	bra		t_inx2
6509
6510ATANTINY:
6511#--|X| < 2^(-40), ATAN(X) = X
6512
6513	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6514	mov.b		&FMOV_OP,%d1		# last inst is MOVE
6515	fmov.x		X(%a6),%fp0		# last inst - possible exception set
6516
6517	bra		t_catch
6518
6519ATANBIG:
6520#--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
6521#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6522	cmp.l		%d1,&0x40638000
6523	bgt.w		ATANHUGE
6524
6525#--APPROXIMATE ATAN(-1/X) BY
6526#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6527#--THIS CAN BE RE-WRITTEN AS
6528#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6529
6530	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
6531
6532	fmov.s		&0xBF800000,%fp1	# LOAD -1
6533	fdiv.x		%fp0,%fp1		# FP1 IS -1/X
6534
6535#--DIVIDE IS STILL CRANKING
6536
6537	fmov.x		%fp1,%fp0		# FP0 IS X'
6538	fmul.x		%fp0,%fp0		# FP0 IS Y = X'*X'
6539	fmov.x		%fp1,X(%a6)		# X IS REALLY X'
6540
6541	fmov.x		%fp0,%fp1
6542	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
6543
6544	fmov.d		ATANC5(%pc),%fp3
6545	fmov.d		ATANC4(%pc),%fp2
6546
6547	fmul.x		%fp1,%fp3		# Z*C5
6548	fmul.x		%fp1,%fp2		# Z*B4
6549
6550	fadd.d		ATANC3(%pc),%fp3	# C3+Z*C5
6551	fadd.d		ATANC2(%pc),%fp2	# C2+Z*C4
6552
6553	fmul.x		%fp3,%fp1		# Z*(C3+Z*C5), FP3 RELEASED
6554	fmul.x		%fp0,%fp2		# Y*(C2+Z*C4)
6555
6556	fadd.d		ATANC1(%pc),%fp1	# C1+Z*(C3+Z*C5)
6557	fmul.x		X(%a6),%fp0		# X'*Y
6558
6559	fadd.x		%fp2,%fp1		# [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6560
6561	fmul.x		%fp1,%fp0		# X'*Y*([B1+Z*(B3+Z*B5)]
6562#					...	+[Y*(B2+Z*(B4+Z*B6))])
6563	fadd.x		X(%a6),%fp0
6564
6565	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
6566
6567	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6568	tst.b		(%a0)
6569	bpl.b		pos_big
6570
6571neg_big:
6572	fadd.x		NPIBY2(%pc),%fp0
6573	bra		t_minx2
6574
6575pos_big:
6576	fadd.x		PPIBY2(%pc),%fp0
6577	bra		t_pinx2
6578
6579ATANHUGE:
6580#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6581	tst.b		(%a0)
6582	bpl.b		pos_huge
6583
6584neg_huge:
6585	fmov.x		NPIBY2(%pc),%fp0
6586	fmov.l		%d0,%fpcr
6587	fadd.x		PTINY(%pc),%fp0
6588	bra		t_minx2
6589
6590pos_huge:
6591	fmov.x		PPIBY2(%pc),%fp0
6592	fmov.l		%d0,%fpcr
6593	fadd.x		NTINY(%pc),%fp0
6594	bra		t_pinx2
6595
6596	global		satand
6597#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6598satand:
6599	bra		t_extdnrm
6600
6601#########################################################################
6602# sasin():  computes the inverse sine of a normalized input		#
6603# sasind(): computes the inverse sine of a denormalized input		#
6604#									#
6605# INPUT ***************************************************************	#
6606#	a0 = pointer to extended precision input			#
6607#	d0 = round precision,mode					#
6608#									#
6609# OUTPUT **************************************************************	#
6610#	fp0 = arcsin(X)							#
6611#									#
6612# ACCURACY and MONOTONICITY *******************************************	#
6613#	The returned result is within 3 ulps in	64 significant bit,	#
6614#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6615#	rounded to double precision. The result is provably monotonic	#
6616#	in double precision.						#
6617#									#
6618# ALGORITHM ***********************************************************	#
6619#									#
6620#	ASIN								#
6621#	1. If |X| >= 1, go to 3.					#
6622#									#
6623#	2. (|X| < 1) Calculate asin(X) by				#
6624#		z := sqrt( [1-X][1+X] )					#
6625#		asin(X) = atan( x / z ).				#
6626#		Exit.							#
6627#									#
6628#	3. If |X| > 1, go to 5.						#
6629#									#
6630#	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6631#									#
6632#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
6633#		Exit.							#
6634#									#
6635#########################################################################
6636
6637	global		sasin
6638sasin:
6639	fmov.x		(%a0),%fp0		# LOAD INPUT
6640
6641	mov.l		(%a0),%d1
6642	mov.w		4(%a0),%d1
6643	and.l		&0x7FFFFFFF,%d1
6644	cmp.l		%d1,&0x3FFF8000
6645	bge.b		ASINBIG
6646
6647# This catch is added here for the '060 QSP. Originally, the call to
6648# satan() would handle this case by causing the exception which would
6649# not be caught until gen_except(). Now, with the exceptions being
6650# detected inside of satan(), the exception would have been handled there
6651# instead of inside sasin() as expected.
6652	cmp.l		%d1,&0x3FD78000
6653	blt.w		ASINTINY
6654
6655#--THIS IS THE USUAL CASE, |X| < 1
6656#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6657
6658ASINMAIN:
6659	fmov.s		&0x3F800000,%fp1
6660	fsub.x		%fp0,%fp1		# 1-X
6661	fmovm.x		&0x4,-(%sp)		#  {fp2}
6662	fmov.s		&0x3F800000,%fp2
6663	fadd.x		%fp0,%fp2		# 1+X
6664	fmul.x		%fp2,%fp1		# (1+X)(1-X)
6665	fmovm.x		(%sp)+,&0x20		#  {fp2}
6666	fsqrt.x		%fp1			# SQRT([1-X][1+X])
6667	fdiv.x		%fp1,%fp0		# X/SQRT([1-X][1+X])
6668	fmovm.x		&0x01,-(%sp)		# save X/SQRT(...)
6669	lea		(%sp),%a0		# pass ptr to X/SQRT(...)
6670	bsr		satan
6671	add.l		&0xc,%sp		# clear X/SQRT(...) from stack
6672	bra		t_inx2
6673
6674ASINBIG:
6675	fabs.x		%fp0			# |X|
6676	fcmp.s		%fp0,&0x3F800000
6677	fbgt		t_operr			# cause an operr exception
6678
6679#--|X| = 1, ASIN(X) = +- PI/2.
6680ASINONE:
6681	fmov.x		PIBY2(%pc),%fp0
6682	mov.l		(%a0),%d1
6683	and.l		&0x80000000,%d1		# SIGN BIT OF X
6684	or.l		&0x3F800000,%d1		# +-1 IN SGL FORMAT
6685	mov.l		%d1,-(%sp)		# push SIGN(X) IN SGL-FMT
6686	fmov.l		%d0,%fpcr
6687	fmul.s		(%sp)+,%fp0
6688	bra		t_inx2
6689
6690#--|X| < 2^(-40), ATAN(X) = X
6691ASINTINY:
6692	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6693	mov.b		&FMOV_OP,%d1		# last inst is MOVE
6694	fmov.x		(%a0),%fp0		# last inst - possible exception
6695	bra		t_catch
6696
6697	global		sasind
6698#--ASIN(X) = X FOR DENORMALIZED X
6699sasind:
6700	bra		t_extdnrm
6701
6702#########################################################################
6703# sacos():  computes the inverse cosine of a normalized input		#
6704# sacosd(): computes the inverse cosine of a denormalized input		#
6705#									#
6706# INPUT ***************************************************************	#
6707#	a0 = pointer to extended precision input			#
6708#	d0 = round precision,mode					#
6709#									#
6710# OUTPUT ************************************************************** #
6711#	fp0 = arccos(X)							#
6712#									#
6713# ACCURACY and MONOTONICITY *******************************************	#
6714#	The returned result is within 3 ulps in	64 significant bit,	#
6715#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6716#	rounded to double precision. The result is provably monotonic	#
6717#	in double precision.						#
6718#									#
6719# ALGORITHM *********************************************************** #
6720#									#
6721#	ACOS								#
6722#	1. If |X| >= 1, go to 3.					#
6723#									#
6724#	2. (|X| < 1) Calculate acos(X) by				#
6725#		z := (1-X) / (1+X)					#
6726#		acos(X) = 2 * atan( sqrt(z) ).				#
6727#		Exit.							#
6728#									#
6729#	3. If |X| > 1, go to 5.						#
6730#									#
6731#	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.	#
6732#									#
6733#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
6734#		Exit.							#
6735#									#
6736#########################################################################
6737
6738	global		sacos
6739sacos:
6740	fmov.x		(%a0),%fp0		# LOAD INPUT
6741
6742	mov.l		(%a0),%d1		# pack exp w/ upper 16 fraction
6743	mov.w		4(%a0),%d1
6744	and.l		&0x7FFFFFFF,%d1
6745	cmp.l		%d1,&0x3FFF8000
6746	bge.b		ACOSBIG
6747
6748#--THIS IS THE USUAL CASE, |X| < 1
6749#--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) ) )
6750
6751ACOSMAIN:
6752	fmov.s		&0x3F800000,%fp1
6753	fadd.x		%fp0,%fp1		# 1+X
6754	fneg.x		%fp0			# -X
6755	fadd.s		&0x3F800000,%fp0	# 1-X
6756	fdiv.x		%fp1,%fp0		# (1-X)/(1+X)
6757	fsqrt.x		%fp0			# SQRT((1-X)/(1+X))
6758	mov.l		%d0,-(%sp)		# save original users fpcr
6759	clr.l		%d0
6760	fmovm.x		&0x01,-(%sp)		# save SQRT(...) to stack
6761	lea		(%sp),%a0		# pass ptr to sqrt
6762	bsr		satan			# ATAN(SQRT([1-X]/[1+X]))
6763	add.l		&0xc,%sp		# clear SQRT(...) from stack
6764
6765	fmov.l		(%sp)+,%fpcr		# restore users round prec,mode
6766	fadd.x		%fp0,%fp0		# 2 * ATAN( STUFF )
6767	bra		t_pinx2
6768
6769ACOSBIG:
6770	fabs.x		%fp0
6771	fcmp.s		%fp0,&0x3F800000
6772	fbgt		t_operr			# cause an operr exception
6773
6774#--|X| = 1, ACOS(X) = 0 OR PI
6775	tst.b		(%a0)			# is X positive or negative?
6776	bpl.b		ACOSP1
6777
6778#--X = -1
6779#Returns PI and inexact exception
6780ACOSM1:
6781	fmov.x		PI(%pc),%fp0		# load PI
6782	fmov.l		%d0,%fpcr		# load round mode,prec
6783	fadd.s		&0x00800000,%fp0	# add a small value
6784	bra		t_pinx2
6785
6786ACOSP1:
6787	bra		ld_pzero		# answer is positive zero
6788
6789	global		sacosd
6790#--ACOS(X) = PI/2 FOR DENORMALIZED X
6791sacosd:
6792	fmov.l		%d0,%fpcr		# load user's rnd mode/prec
6793	fmov.x		PIBY2(%pc),%fp0
6794	bra		t_pinx2
6795
6796#########################################################################
6797# setox():    computes the exponential for a normalized input		#
6798# setoxd():   computes the exponential for a denormalized input		#
6799# setoxm1():  computes the exponential minus 1 for a normalized input	#
6800# setoxm1d(): computes the exponential minus 1 for a denormalized input	#
6801#									#
6802# INPUT	*************************************************************** #
6803#	a0 = pointer to extended precision input			#
6804#	d0 = round precision,mode					#
6805#									#
6806# OUTPUT ************************************************************** #
6807#	fp0 = exp(X) or exp(X)-1					#
6808#									#
6809# ACCURACY and MONOTONICITY ******************************************* #
6810#	The returned result is within 0.85 ulps in 64 significant bit, 	#
6811#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6812#	rounded to double precision. The result is provably monotonic 	#
6813#	in double precision.						#
6814#									#
6815# ALGORITHM and IMPLEMENTATION **************************************** #
6816#									#
6817#	setoxd								#
6818#	------								#
6819#	Step 1.	Set ans := 1.0						#
6820#									#
6821#	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.		#
6822#	Notes:	This will always generate one exception -- inexact.	#
6823#									#
6824#									#
6825#	setox								#
6826#	-----								#
6827#									#
6828#	Step 1.	Filter out extreme cases of input argument.		#
6829#		1.1	If |X| >= 2^(-65), go to Step 1.3.		#
6830#		1.2	Go to Step 7.					#
6831#		1.3	If |X| < 16380 log(2), go to Step 2.		#
6832#		1.4	Go to Step 8.					#
6833#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
6834#		To avoid the use of floating-point comparisons, a	#
6835#		compact representation of |X| is used. This format is a	#
6836#		32-bit integer, the upper (more significant) 16 bits 	#
6837#		are the sign and biased exponent field of |X|; the 	#
6838#		lower 16 bits are the 16 most significant fraction	#
6839#		(including the explicit bit) bits of |X|. Consequently,	#
6840#		the comparisons in Steps 1.1 and 1.3 can be performed	#
6841#		by integer comparison. Note also that the constant	#
6842#		16380 log(2) used in Step 1.3 is also in the compact	#
6843#		form. Thus taking the branch to Step 2 guarantees 	#
6844#		|X| < 16380 log(2). There is no harm to have a small	#
6845#		number of cases where |X| is less than,	but close to,	#
6846#		16380 log(2) and the branch to Step 9 is taken.		#
6847#									#
6848#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
6849#		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6850#			was taken)					#
6851#		2.2	N := round-to-nearest-integer( X * 64/log2 ).	#
6852#		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., #
6853#			or 63.						#
6854#		2.4	Calculate	M = (N - J)/64; so N = 64M + J.	#
6855#		2.5	Calculate the address of the stored value of 	#
6856#			2^(J/64).					#
6857#		2.6	Create the value Scale = 2^M.			#
6858#	Notes:	The calculation in 2.2 is really performed by		#
6859#			Z := X * constant				#
6860#			N := round-to-nearest-integer(Z)		#
6861#		where							#
6862#			constant := single-precision( 64/log 2 ).	#
6863#									#
6864#		Using a single-precision constant avoids memory 	#
6865#		access. Another effect of using a single-precision	#
6866#		"constant" is that the calculated value Z is 		#
6867#									#
6868#			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).	#
6869#									#
6870#		This error has to be considered later in Steps 3 and 4.	#
6871#									#
6872#	Step 3.	Calculate X - N*log2/64.				#
6873#		3.1	R := X + N*L1, 					#
6874#				where L1 := single-precision(-log2/64).	#
6875#		3.2	R := R + N*L2, 					#
6876#				L2 := extended-precision(-log2/64 - L1).#
6877#	Notes:	a) The way L1 and L2 are chosen ensures L1+L2 		#
6878#		approximate the value -log2/64 to 88 bits of accuracy.	#
6879#		b) N*L1 is exact because N is no longer than 22 bits	#
6880#		and L1 is no longer than 24 bits.			#
6881#		c) The calculation X+N*L1 is also exact due to 		#
6882#		cancellation. Thus, R is practically X+N(L1+L2) to full	#
6883#		64 bits. 						#
6884#		d) It is important to estimate how large can |R| be	#
6885#		after Step 3.2.						#
6886#									#
6887#		N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)	#
6888#		X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5	#
6889#		X*64/log2 - N	=	f - eps*X 64/log2		#
6890#		X - N*log2/64	=	f*log2/64 - eps*X		#
6891#									#
6892#									#
6893#		Now |X| <= 16446 log2, thus				#
6894#									#
6895#			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64	#
6896#					<= 0.57 log2/64.		#
6897#		 This bound will be used in Step 4.			#
6898#									#
6899#	Step 4.	Approximate exp(R)-1 by a polynomial			#
6900#		p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))	#
6901#	Notes:	a) In order to reduce memory access, the coefficients 	#
6902#		are made as "short" as possible: A1 (which is 1/2), A4	#
6903#		and A5 are single precision; A2 and A3 are double	#
6904#		precision. 						#
6905#		b) Even with the restrictions above, 			#
6906#		   |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.	#
6907#		Note that 0.0062 is slightly bigger than 0.57 log2/64.	#
6908#		c) To fully use the pipeline, p is separated into	#
6909#		two independent pieces of roughly equal complexities	#
6910#			p = [ R + R*S*(A2 + S*A4) ]	+		#
6911#				[ S*(A1 + S*(A3 + S*A5)) ]		#
6912#		where S = R*R.						#
6913#									#
6914#	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by		#
6915#				ans := T + ( T*p + t)			#
6916#		where T and t are the stored values for 2^(J/64).	#
6917#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
6918#		2^(J/64) to roughly 85 bits; T is in extended precision	#
6919#		and t is in single precision. Note also that T is 	#
6920#		rounded to 62 bits so that the last two bits of T are 	#
6921#		zero. The reason for such a special form is that T-1, 	#
6922#		T-2, and T-8 will all be exact --- a property that will	#
6923#		give much more accurate computation of the function 	#
6924#		EXPM1.							#
6925#									#
6926#	Step 6.	Reconstruction of exp(X)				#
6927#			exp(X) = 2^M * 2^(J/64) * exp(R).		#
6928#		6.1	If AdjFlag = 0, go to 6.3			#
6929#		6.2	ans := ans * AdjScale				#
6930#		6.3	Restore the user FPCR				#
6931#		6.4	Return ans := ans * Scale. Exit.		#
6932#	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,	#
6933#		|M| <= 16380, and Scale = 2^M. Moreover, exp(X) will	#
6934#		neither overflow nor underflow. If AdjFlag = 1, that	#
6935#		means that						#
6936#			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.	#
6937#		Hence, exp(X) may overflow or underflow or neither.	#
6938#		When that is the case, AdjScale = 2^(M1) where M1 is	#
6939#		approximately M. Thus 6.2 will never cause 		#
6940#		over/underflow. Possible exception in 6.4 is overflow	#
6941#		or underflow. The inexact exception is not generated in	#
6942#		6.4. Although one can argue that the inexact flag	#
6943#		should always be raised, to simulate that exception 	#
6944#		cost to much than the flag is worth in practical uses.	#
6945#									#
6946#	Step 7.	Return 1 + X.						#
6947#		7.1	ans := X					#
6948#		7.2	Restore user FPCR.				#
6949#		7.3	Return ans := 1 + ans. Exit			#
6950#	Notes:	For non-zero X, the inexact exception will always be	#
6951#		raised by 7.3. That is the only exception raised by 7.3.#
6952#		Note also that we use the FMOVEM instruction to move X	#
6953#		in Step 7.1 to avoid unnecessary trapping. (Although	#
6954#		the FMOVEM may not seem relevant since X is normalized,	#
6955#		the precaution will be useful in the library version of	#
6956#		this code where the separate entry for denormalized 	#
6957#		inputs will be done away with.)				#
6958#									#
6959#	Step 8.	Handle exp(X) where |X| >= 16380log2.			#
6960#		8.1	If |X| > 16480 log2, go to Step 9.		#
6961#		(mimic 2.2 - 2.6)					#
6962#		8.2	N := round-to-integer( X * 64/log2 )		#
6963#		8.3	Calculate J = N mod 64, J = 0,1,...,63		#
6964#		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1, 	#
6965#			AdjFlag := 1.					#
6966#		8.5	Calculate the address of the stored value 	#
6967#			2^(J/64).					#
6968#		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.	#
6969#		8.7	Go to Step 3.					#
6970#	Notes:	Refer to notes for 2.2 - 2.6.				#
6971#									#
6972#	Step 9.	Handle exp(X), |X| > 16480 log2.			#
6973#		9.1	If X < 0, go to 9.3				#
6974#		9.2	ans := Huge, go to 9.4				#
6975#		9.3	ans := Tiny.					#
6976#		9.4	Restore user FPCR.				#
6977#		9.5	Return ans := ans * ans. Exit.			#
6978#	Notes:	Exp(X) will surely overflow or underflow, depending on	#
6979#		X's sign. "Huge" and "Tiny" are respectively large/tiny	#
6980#		extended-precision numbers whose square over/underflow	#
6981#		with an inexact result. Thus, 9.5 always raises the	#
6982#		inexact together with either overflow or underflow.	#
6983#									#
6984#	setoxm1d							#
6985#	--------							#
6986#									#
6987#	Step 1.	Set ans := 0						#
6988#									#
6989#	Step 2.	Return	ans := X + ans. Exit.				#
6990#	Notes:	This will return X with the appropriate rounding	#
6991#		 precision prescribed by the user FPCR.			#
6992#									#
6993#	setoxm1								#
6994#	-------								#
6995#									#
6996#	Step 1.	Check |X|						#
6997#		1.1	If |X| >= 1/4, go to Step 1.3.			#
6998#		1.2	Go to Step 7.					#
6999#		1.3	If |X| < 70 log(2), go to Step 2.		#
7000#		1.4	Go to Step 10.					#
7001#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
7002#		However, it is conceivable |X| can be small very often	#
7003#		because EXPM1 is intended to evaluate exp(X)-1 		#
7004#		accurately when |X| is small. For further details on 	#
7005#		the comparisons, see the notes on Step 1 of setox.	#
7006#									#
7007#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
7008#		2.1	N := round-to-nearest-integer( X * 64/log2 ).	#
7009#		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., #
7010#			or 63.						#
7011#		2.3	Calculate	M = (N - J)/64; so N = 64M + J.	#
7012#		2.4	Calculate the address of the stored value of 	#
7013#			2^(J/64).					#
7014#		2.5	Create the values Sc = 2^M and 			#
7015#			OnebySc := -2^(-M).				#
7016#	Notes:	See the notes on Step 2 of setox.			#
7017#									#
7018#	Step 3.	Calculate X - N*log2/64.				#
7019#		3.1	R := X + N*L1, 					#
7020#				where L1 := single-precision(-log2/64).	#
7021#		3.2	R := R + N*L2, 					#
7022#				L2 := extended-precision(-log2/64 - L1).#
7023#	Notes:	Applying the analysis of Step 3 of setox in this case	#
7024#		shows that |R| <= 0.0055 (note that |X| <= 70 log2 in	#
7025#		this case).						#
7026#									#
7027#	Step 4.	Approximate exp(R)-1 by a polynomial			#
7028#			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))	#
7029#	Notes:	a) In order to reduce memory access, the coefficients 	#
7030#		are made as "short" as possible: A1 (which is 1/2), A5 	#
7031#		and A6 are single precision; A2, A3 and A4 are double 	#
7032#		precision. 						#
7033#		b) Even with the restriction above,			#
7034#			|p - (exp(R)-1)| <	|R| * 2^(-72.7)		#
7035#		for all |R| <= 0.0055.					#
7036#		c) To fully use the pipeline, p is separated into	#
7037#		two independent pieces of roughly equal complexity	#
7038#			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+	#
7039#				[ R + S*(A1 + S*(A3 + S*A5)) ]		#
7040#		where S = R*R.						#
7041#									#
7042#	Step 5.	Compute 2^(J/64)*p by					#
7043#				p := T*p				#
7044#		where T and t are the stored values for 2^(J/64).	#
7045#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
7046#		2^(J/64) to roughly 85 bits; T is in extended precision	#
7047#		and t is in single precision. Note also that T is 	#
7048#		rounded to 62 bits so that the last two bits of T are 	#
7049#		zero. The reason for such a special form is that T-1, 	#
7050#		T-2, and T-8 will all be exact --- a property that will	#
7051#		be exploited in Step 6 below. The total relative error	#
7052#		in p is no bigger than 2^(-67.7) compared to the final	#
7053#		result.							#
7054#									#
7055#	Step 6.	Reconstruction of exp(X)-1				#
7056#			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).	#
7057#		6.1	If M <= 63, go to Step 6.3.			#
7058#		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6	#
7059#		6.3	If M >= -3, go to 6.5.				#
7060#		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6	#
7061#		6.5	ans := (T + OnebySc) + (p + t).			#
7062#		6.6	Restore user FPCR.				#
7063#		6.7	Return ans := Sc * ans. Exit.			#
7064#	Notes:	The various arrangements of the expressions give 	#
7065#		accurate evaluations.					#
7066#									#
7067#	Step 7.	exp(X)-1 for |X| < 1/4.					#
7068#		7.1	If |X| >= 2^(-65), go to Step 9.		#
7069#		7.2	Go to Step 8.					#
7070#									#
7071#	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).			#
7072#		8.1	If |X| < 2^(-16312), goto 8.3			#
7073#		8.2	Restore FPCR; return ans := X - 2^(-16382).	#
7074#			Exit.						#
7075#		8.3	X := X * 2^(140).				#
7076#		8.4	Restore FPCR; ans := ans - 2^(-16382).		#
7077#		 Return ans := ans*2^(140). Exit			#
7078#	Notes:	The idea is to return "X - tiny" under the user		#
7079#		precision and rounding modes. To avoid unnecessary	#
7080#		inefficiency, we stay away from denormalized numbers 	#
7081#		the best we can. For |X| >= 2^(-16312), the 		#
7082#		straightforward 8.2 generates the inexact exception as	#
7083#		the case warrants.					#
7084#									#
7085#	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial		#
7086#			p = X + X*X*(B1 + X*(B2 + ... + X*B12))		#
7087#	Notes:	a) In order to reduce memory access, the coefficients	#
7088#		are made as "short" as possible: B1 (which is 1/2), B9	#
7089#		to B12 are single precision; B3 to B8 are double 	#
7090#		precision; and B2 is double extended.			#
7091#		b) Even with the restriction above,			#
7092#			|p - (exp(X)-1)| < |X| 2^(-70.6)		#
7093#		for all |X| <= 0.251.					#
7094#		Note that 0.251 is slightly bigger than 1/4.		#
7095#		c) To fully preserve accuracy, the polynomial is 	#
7096#		computed as						#
7097#			X + ( S*B1 +	Q ) where S = X*X and		#
7098#			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))	#
7099#		d) To fully use the pipeline, Q is separated into	#
7100#		two independent pieces of roughly equal complexity	#
7101#			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +	#
7102#				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]	#
7103#									#
7104#	Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.		#
7105#		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all 	#
7106#		practical purposes. Therefore, go to Step 1 of setox.	#
7107#		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical	#
7108#		purposes. 						#
7109#		ans := -1 						#
7110#		Restore user FPCR					#
7111#		Return ans := ans + 2^(-126). Exit.			#
7112#	Notes:	10.2 will always create an inexact and return -1 + tiny	#
7113#		in the user rounding precision and mode.		#
7114#									#
7115#########################################################################
7116
7117L2:	long		0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7118
7119EEXPA3:	long		0x3FA55555,0x55554CC1
7120EEXPA2:	long		0x3FC55555,0x55554A54
7121
7122EM1A4:	long		0x3F811111,0x11174385
7123EM1A3:	long		0x3FA55555,0x55554F5A
7124
7125EM1A2:	long		0x3FC55555,0x55555555,0x00000000,0x00000000
7126
7127EM1B8:	long		0x3EC71DE3,0xA5774682
7128EM1B7:	long		0x3EFA01A0,0x19D7CB68
7129
7130EM1B6:	long		0x3F2A01A0,0x1A019DF3
7131EM1B5:	long		0x3F56C16C,0x16C170E2
7132
7133EM1B4:	long		0x3F811111,0x11111111
7134EM1B3:	long		0x3FA55555,0x55555555
7135
7136EM1B2:	long		0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7137	long		0x00000000
7138
7139TWO140:	long		0x48B00000,0x00000000
7140TWON140:
7141	long		0x37300000,0x00000000
7142
7143EEXPTBL:
7144	long		0x3FFF0000,0x80000000,0x00000000,0x00000000
7145	long		0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7146	long		0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7147	long		0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7148	long		0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7149	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7150	long		0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7151	long		0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7152	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7153	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7154	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7155	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7156	long		0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7157	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7158	long		0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7159	long		0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7160	long		0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7161	long		0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7162	long		0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7163	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7164	long		0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7165	long		0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7166	long		0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7167	long		0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7168	long		0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7169	long		0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7170	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7171	long		0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7172	long		0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7173	long		0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7174	long		0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7175	long		0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7176	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7177	long		0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7178	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7179	long		0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7180	long		0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7181	long		0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7182	long		0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7183	long		0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7184	long		0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7185	long		0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7186	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7187	long		0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7188	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7189	long		0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7190	long		0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7191	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7192	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7193	long		0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7194	long		0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7195	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7196	long		0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7197	long		0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7198	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7199	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7200	long		0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7201	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7202	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7203	long		0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7204	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7205	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7206	long		0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7207	long		0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7208
7209	set		ADJFLAG,L_SCR2
7210	set		SCALE,FP_SCR0
7211	set		ADJSCALE,FP_SCR1
7212	set		SC,FP_SCR0
7213	set		ONEBYSC,FP_SCR1
7214
7215	global		setox
7216setox:
7217#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7218
7219#--Step 1.
7220	mov.l		(%a0),%d1		# load part of input X
7221	and.l		&0x7FFF0000,%d1		# biased expo. of X
7222	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
7223	bge.b		EXPC1			# normal case
7224	bra		EXPSM
7225
7226EXPC1:
7227#--The case |X| >= 2^(-65)
7228	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
7229	cmp.l		%d1,&0x400CB167		# 16380 log2 trunc. 16 bits
7230	blt.b		EXPMAIN			# normal case
7231	bra		EEXPBIG
7232
7233EXPMAIN:
7234#--Step 2.
7235#--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
7236	fmov.x		(%a0),%fp0		# load input from (a0)
7237
7238	fmov.x		%fp0,%fp1
7239	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7240	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7241	mov.l		&0,ADJFLAG(%a6)
7242	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7243	lea		EEXPTBL(%pc),%a1
7244	fmov.l		%d1,%fp0		# convert to floating-format
7245
7246	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7247	and.l		&0x3F,%d1		# D0 is J = N mod 64
7248	lsl.l		&4,%d1
7249	add.l		%d1,%a1			# address of 2^(J/64)
7250	mov.l		L_SCR1(%a6),%d1
7251	asr.l		&6,%d1			# D0 is M
7252	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
7253	mov.w		L2(%pc),L_SCR1(%a6)	# prefetch L2, no need in CB
7254
7255EXPCONT1:
7256#--Step 3.
7257#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7258#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7259	fmov.x		%fp0,%fp2
7260	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
7261	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
7262	fadd.x		%fp1,%fp0		# X + N*L1
7263	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
7264
7265#--Step 4.
7266#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7267#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7268#--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R
7269#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7270
7271	fmov.x		%fp0,%fp1
7272	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
7273
7274	fmov.s		&0x3AB60B70,%fp2	# fp2 IS A5
7275
7276	fmul.x		%fp1,%fp2		# fp2 IS S*A5
7277	fmov.x		%fp1,%fp3
7278	fmul.s		&0x3C088895,%fp3	# fp3 IS S*A4
7279
7280	fadd.d		EEXPA3(%pc),%fp2	# fp2 IS A3+S*A5
7281	fadd.d		EEXPA2(%pc),%fp3	# fp3 IS A2+S*A4
7282
7283	fmul.x		%fp1,%fp2		# fp2 IS S*(A3+S*A5)
7284	mov.w		%d1,SCALE(%a6)		# SCALE is 2^(M) in extended
7285	mov.l		&0x80000000,SCALE+4(%a6)
7286	clr.l		SCALE+8(%a6)
7287
7288	fmul.x		%fp1,%fp3		# fp3 IS S*(A2+S*A4)
7289
7290	fadd.s		&0x3F000000,%fp2	# fp2 IS A1+S*(A3+S*A5)
7291	fmul.x		%fp0,%fp3		# fp3 IS R*S*(A2+S*A4)
7292
7293	fmul.x		%fp1,%fp2		# fp2 IS S*(A1+S*(A3+S*A5))
7294	fadd.x		%fp3,%fp0		# fp0 IS R+R*S*(A2+S*A4),
7295
7296	fmov.x		(%a1)+,%fp1		# fp1 is lead. pt. of 2^(J/64)
7297	fadd.x		%fp2,%fp0		# fp0 is EXP(R) - 1
7298
7299#--Step 5
7300#--final reconstruction process
7301#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7302
7303	fmul.x		%fp1,%fp0		# 2^(J/64)*(Exp(R)-1)
7304	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7305	fadd.s		(%a1),%fp0		# accurate 2^(J/64)
7306
7307	fadd.x		%fp1,%fp0		# 2^(J/64) + 2^(J/64)*...
7308	mov.l		ADJFLAG(%a6),%d1
7309
7310#--Step 6
7311	tst.l		%d1
7312	beq.b		NORMAL
7313ADJUST:
7314	fmul.x		ADJSCALE(%a6),%fp0
7315NORMAL:
7316	fmov.l		%d0,%fpcr		# restore user FPCR
7317	mov.b		&FMUL_OP,%d1		# last inst is MUL
7318	fmul.x		SCALE(%a6),%fp0		# multiply 2^(M)
7319	bra		t_catch
7320
7321EXPSM:
7322#--Step 7
7323	fmovm.x		(%a0),&0x80		# load X
7324	fmov.l		%d0,%fpcr
7325	fadd.s		&0x3F800000,%fp0	# 1+X in user mode
7326	bra		t_pinx2
7327
7328EEXPBIG:
7329#--Step 8
7330	cmp.l		%d1,&0x400CB27C		# 16480 log2
7331	bgt.b		EXP2BIG
7332#--Steps 8.2 -- 8.6
7333	fmov.x		(%a0),%fp0		# load input from (a0)
7334
7335	fmov.x		%fp0,%fp1
7336	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7337	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7338	mov.l		&1,ADJFLAG(%a6)
7339	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7340	lea		EEXPTBL(%pc),%a1
7341	fmov.l		%d1,%fp0		# convert to floating-format
7342	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7343	and.l		&0x3F,%d1		# D0 is J = N mod 64
7344	lsl.l		&4,%d1
7345	add.l		%d1,%a1			# address of 2^(J/64)
7346	mov.l		L_SCR1(%a6),%d1
7347	asr.l		&6,%d1			# D0 is K
7348	mov.l		%d1,L_SCR1(%a6)		# save K temporarily
7349	asr.l		&1,%d1			# D0 is M1
7350	sub.l		%d1,L_SCR1(%a6)		# a1 is M
7351	add.w		&0x3FFF,%d1		# biased expo. of 2^(M1)
7352	mov.w		%d1,ADJSCALE(%a6)	# ADJSCALE := 2^(M1)
7353	mov.l		&0x80000000,ADJSCALE+4(%a6)
7354	clr.l		ADJSCALE+8(%a6)
7355	mov.l		L_SCR1(%a6),%d1		# D0 is M
7356	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
7357	bra.w		EXPCONT1		# go back to Step 3
7358
7359EXP2BIG:
7360#--Step 9
7361	tst.b		(%a0)			# is X positive or negative?
7362	bmi		t_unfl2
7363	bra		t_ovfl2
7364
7365	global		setoxd
7366setoxd:
7367#--entry point for EXP(X), X is denormalized
7368	mov.l		(%a0),-(%sp)
7369	andi.l		&0x80000000,(%sp)
7370	ori.l		&0x00800000,(%sp)	# sign(X)*2^(-126)
7371
7372	fmov.s		&0x3F800000,%fp0
7373
7374	fmov.l		%d0,%fpcr
7375	fadd.s		(%sp)+,%fp0
7376	bra		t_pinx2
7377
7378	global		setoxm1
7379setoxm1:
7380#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7381
7382#--Step 1.
7383#--Step 1.1
7384	mov.l		(%a0),%d1		# load part of input X
7385	and.l		&0x7FFF0000,%d1		# biased expo. of X
7386	cmp.l		%d1,&0x3FFD0000		# 1/4
7387	bge.b		EM1CON1			# |X| >= 1/4
7388	bra		EM1SM
7389
7390EM1CON1:
7391#--Step 1.3
7392#--The case |X| >= 1/4
7393	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
7394	cmp.l		%d1,&0x4004C215		# 70log2 rounded up to 16 bits
7395	ble.b		EM1MAIN			# 1/4 <= |X| <= 70log2
7396	bra		EM1BIG
7397
7398EM1MAIN:
7399#--Step 2.
7400#--This is the case:	1/4 <= |X| <= 70 log2.
7401	fmov.x		(%a0),%fp0		# load input from (a0)
7402
7403	fmov.x		%fp0,%fp1
7404	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7405	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7406	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7407	lea		EEXPTBL(%pc),%a1
7408	fmov.l		%d1,%fp0		# convert to floating-format
7409
7410	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7411	and.l		&0x3F,%d1		# D0 is J = N mod 64
7412	lsl.l		&4,%d1
7413	add.l		%d1,%a1			# address of 2^(J/64)
7414	mov.l		L_SCR1(%a6),%d1
7415	asr.l		&6,%d1			# D0 is M
7416	mov.l		%d1,L_SCR1(%a6)		# save a copy of M
7417
7418#--Step 3.
7419#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7420#--a0 points to 2^(J/64), D0 and a1 both contain M
7421	fmov.x		%fp0,%fp2
7422	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
7423	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
7424	fadd.x		%fp1,%fp0		# X + N*L1
7425	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
7426	add.w		&0x3FFF,%d1		# D0 is biased expo. of 2^M
7427
7428#--Step 4.
7429#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7430#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7431#--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R
7432#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7433
7434	fmov.x		%fp0,%fp1
7435	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
7436
7437	fmov.s		&0x3950097B,%fp2	# fp2 IS a6
7438
7439	fmul.x		%fp1,%fp2		# fp2 IS S*A6
7440	fmov.x		%fp1,%fp3
7441	fmul.s		&0x3AB60B6A,%fp3	# fp3 IS S*A5
7442
7443	fadd.d		EM1A4(%pc),%fp2		# fp2 IS A4+S*A6
7444	fadd.d		EM1A3(%pc),%fp3		# fp3 IS A3+S*A5
7445	mov.w		%d1,SC(%a6)		# SC is 2^(M) in extended
7446	mov.l		&0x80000000,SC+4(%a6)
7447	clr.l		SC+8(%a6)
7448
7449	fmul.x		%fp1,%fp2		# fp2 IS S*(A4+S*A6)
7450	mov.l		L_SCR1(%a6),%d1		# D0 is	M
7451	neg.w		%d1			# D0 is -M
7452	fmul.x		%fp1,%fp3		# fp3 IS S*(A3+S*A5)
7453	add.w		&0x3FFF,%d1		# biased expo. of 2^(-M)
7454	fadd.d		EM1A2(%pc),%fp2		# fp2 IS A2+S*(A4+S*A6)
7455	fadd.s		&0x3F000000,%fp3	# fp3 IS A1+S*(A3+S*A5)
7456
7457	fmul.x		%fp1,%fp2		# fp2 IS S*(A2+S*(A4+S*A6))
7458	or.w		&0x8000,%d1		# signed/expo. of -2^(-M)
7459	mov.w		%d1,ONEBYSC(%a6)	# OnebySc is -2^(-M)
7460	mov.l		&0x80000000,ONEBYSC+4(%a6)
7461	clr.l		ONEBYSC+8(%a6)
7462	fmul.x		%fp3,%fp1		# fp1 IS S*(A1+S*(A3+S*A5))
7463
7464	fmul.x		%fp0,%fp2		# fp2 IS R*S*(A2+S*(A4+S*A6))
7465	fadd.x		%fp1,%fp0		# fp0 IS R+S*(A1+S*(A3+S*A5))
7466
7467	fadd.x		%fp2,%fp0		# fp0 IS EXP(R)-1
7468
7469	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7470
7471#--Step 5
7472#--Compute 2^(J/64)*p
7473
7474	fmul.x		(%a1),%fp0		# 2^(J/64)*(Exp(R)-1)
7475
7476#--Step 6
7477#--Step 6.1
7478	mov.l		L_SCR1(%a6),%d1		# retrieve M
7479	cmp.l		%d1,&63
7480	ble.b		MLE63
7481#--Step 6.2	M >= 64
7482	fmov.s		12(%a1),%fp1		# fp1 is t
7483	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is t+OnebySc
7484	fadd.x		%fp1,%fp0		# p+(t+OnebySc), fp1 released
7485	fadd.x		(%a1),%fp0		# T+(p+(t+OnebySc))
7486	bra		EM1SCALE
7487MLE63:
7488#--Step 6.3	M <= 63
7489	cmp.l		%d1,&-3
7490	bge.b		MGEN3
7491MLTN3:
7492#--Step 6.4	M <= -4
7493	fadd.s		12(%a1),%fp0		# p+t
7494	fadd.x		(%a1),%fp0		# T+(p+t)
7495	fadd.x		ONEBYSC(%a6),%fp0	# OnebySc + (T+(p+t))
7496	bra		EM1SCALE
7497MGEN3:
7498#--Step 6.5	-3 <= M <= 63
7499	fmov.x		(%a1)+,%fp1		# fp1 is T
7500	fadd.s		(%a1),%fp0		# fp0 is p+t
7501	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is T+OnebySc
7502	fadd.x		%fp1,%fp0		# (T+OnebySc)+(p+t)
7503
7504EM1SCALE:
7505#--Step 6.6
7506	fmov.l		%d0,%fpcr
7507	fmul.x		SC(%a6),%fp0
7508	bra		t_inx2
7509
7510EM1SM:
7511#--Step 7	|X| < 1/4.
7512	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
7513	bge.b		EM1POLY
7514
7515EM1TINY:
7516#--Step 8	|X| < 2^(-65)
7517	cmp.l		%d1,&0x00330000		# 2^(-16312)
7518	blt.b		EM12TINY
7519#--Step 8.2
7520	mov.l		&0x80010000,SC(%a6)	# SC is -2^(-16382)
7521	mov.l		&0x80000000,SC+4(%a6)
7522	clr.l		SC+8(%a6)
7523	fmov.x		(%a0),%fp0
7524	fmov.l		%d0,%fpcr
7525	mov.b		&FADD_OP,%d1		# last inst is ADD
7526	fadd.x		SC(%a6),%fp0
7527	bra		t_catch
7528
7529EM12TINY:
7530#--Step 8.3
7531	fmov.x		(%a0),%fp0
7532	fmul.d		TWO140(%pc),%fp0
7533	mov.l		&0x80010000,SC(%a6)
7534	mov.l		&0x80000000,SC+4(%a6)
7535	clr.l		SC+8(%a6)
7536	fadd.x		SC(%a6),%fp0
7537	fmov.l		%d0,%fpcr
7538	mov.b		&FMUL_OP,%d1		# last inst is MUL
7539	fmul.d		TWON140(%pc),%fp0
7540	bra		t_catch
7541
7542EM1POLY:
7543#--Step 9	exp(X)-1 by a simple polynomial
7544	fmov.x		(%a0),%fp0		# fp0 is X
7545	fmul.x		%fp0,%fp0		# fp0 is S := X*X
7546	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7547	fmov.s		&0x2F30CAA8,%fp1	# fp1 is B12
7548	fmul.x		%fp0,%fp1		# fp1 is S*B12
7549	fmov.s		&0x310F8290,%fp2	# fp2 is B11
7550	fadd.s		&0x32D73220,%fp1	# fp1 is B10+S*B12
7551
7552	fmul.x		%fp0,%fp2		# fp2 is S*B11
7553	fmul.x		%fp0,%fp1		# fp1 is S*(B10 + ...
7554
7555	fadd.s		&0x3493F281,%fp2	# fp2 is B9+S*...
7556	fadd.d		EM1B8(%pc),%fp1		# fp1 is B8+S*...
7557
7558	fmul.x		%fp0,%fp2		# fp2 is S*(B9+...
7559	fmul.x		%fp0,%fp1		# fp1 is S*(B8+...
7560
7561	fadd.d		EM1B7(%pc),%fp2		# fp2 is B7+S*...
7562	fadd.d		EM1B6(%pc),%fp1		# fp1 is B6+S*...
7563
7564	fmul.x		%fp0,%fp2		# fp2 is S*(B7+...
7565	fmul.x		%fp0,%fp1		# fp1 is S*(B6+...
7566
7567	fadd.d		EM1B5(%pc),%fp2		# fp2 is B5+S*...
7568	fadd.d		EM1B4(%pc),%fp1		# fp1 is B4+S*...
7569
7570	fmul.x		%fp0,%fp2		# fp2 is S*(B5+...
7571	fmul.x		%fp0,%fp1		# fp1 is S*(B4+...
7572
7573	fadd.d		EM1B3(%pc),%fp2		# fp2 is B3+S*...
7574	fadd.x		EM1B2(%pc),%fp1		# fp1 is B2+S*...
7575
7576	fmul.x		%fp0,%fp2		# fp2 is S*(B3+...
7577	fmul.x		%fp0,%fp1		# fp1 is S*(B2+...
7578
7579	fmul.x		%fp0,%fp2		# fp2 is S*S*(B3+...)
7580	fmul.x		(%a0),%fp1		# fp1 is X*S*(B2...
7581
7582	fmul.s		&0x3F000000,%fp0	# fp0 is S*B1
7583	fadd.x		%fp2,%fp1		# fp1 is Q
7584
7585	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7586
7587	fadd.x		%fp1,%fp0		# fp0 is S*B1+Q
7588
7589	fmov.l		%d0,%fpcr
7590	fadd.x		(%a0),%fp0
7591	bra		t_inx2
7592
7593EM1BIG:
7594#--Step 10	|X| > 70 log2
7595	mov.l		(%a0),%d1
7596	cmp.l		%d1,&0
7597	bgt.w		EXPC1
7598#--Step 10.2
7599	fmov.s		&0xBF800000,%fp0	# fp0 is -1
7600	fmov.l		%d0,%fpcr
7601	fadd.s		&0x00800000,%fp0	# -1 + 2^(-126)
7602	bra		t_minx2
7603
7604	global		setoxm1d
7605setoxm1d:
7606#--entry point for EXPM1(X), here X is denormalized
7607#--Step 0.
7608	bra		t_extdnrm
7609
7610#########################################################################
7611# sgetexp():  returns the exponent portion of the input argument.	#
7612#	      The exponent bias is removed and the exponent value is	#
7613#	      returned as an extended precision number in fp0.		#
7614# sgetexpd(): handles denormalized numbers. 				#
7615#									#
7616# sgetman():  extracts the mantissa of the input argument. The 		#
7617#	      mantissa is converted to an extended precision number w/ 	#
7618#	      an exponent of $3fff and is returned in fp0. The range of #
7619#	      the result is [1.0 - 2.0).				#
7620# sgetmand(): handles denormalized numbers.				#
7621#									#
7622# INPUT *************************************************************** #
7623#	a0  = pointer to extended precision input			#
7624#									#
7625# OUTPUT ************************************************************** #
7626#	fp0 = exponent(X) or mantissa(X)				#
7627#									#
7628#########################################################################
7629
7630	global		sgetexp
7631sgetexp:
7632	mov.w		SRC_EX(%a0),%d0		# get the exponent
7633	bclr		&0xf,%d0		# clear the sign bit
7634	subi.w		&0x3fff,%d0		# subtract off the bias
7635	fmov.w		%d0,%fp0		# return exp in fp0
7636	blt.b		sgetexpn		# it's negative
7637	rts
7638
7639sgetexpn:
7640	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7641	rts
7642
7643	global		sgetexpd
7644sgetexpd:
7645	bsr.l		norm			# normalize
7646	neg.w		%d0			# new exp = -(shft amt)
7647	subi.w		&0x3fff,%d0		# subtract off the bias
7648	fmov.w		%d0,%fp0		# return exp in fp0
7649	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7650	rts
7651
7652	global		sgetman
7653sgetman:
7654	mov.w		SRC_EX(%a0),%d0		# get the exp
7655	ori.w		&0x7fff,%d0		# clear old exp
7656	bclr		&0xe,%d0		# make it the new exp +-3fff
7657
7658# here, we build the result in a tmp location so as not to disturb the input
7659	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7660	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7661	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
7662	fmov.x		FP_SCR0(%a6),%fp0	# put new value back in fp0
7663	bmi.b		sgetmann		# it's negative
7664	rts
7665
7666sgetmann:
7667	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7668	rts
7669
7670#
7671# For denormalized numbers, shift the mantissa until the j-bit = 1,
7672# then load the exponent with +/1 $3fff.
7673#
7674	global		sgetmand
7675sgetmand:
7676	bsr.l		norm			# normalize exponent
7677	bra.b		sgetman
7678
7679#########################################################################
7680# scosh():  computes the hyperbolic cosine of a normalized input	#
7681# scoshd(): computes the hyperbolic cosine of a denormalized input	#
7682#									#
7683# INPUT ***************************************************************	#
7684#	a0 = pointer to extended precision input			#
7685#	d0 = round precision,mode					#
7686#									#
7687# OUTPUT **************************************************************	#
7688#	fp0 = cosh(X)							#
7689#									#
7690# ACCURACY and MONOTONICITY *******************************************	#
7691#	The returned result is within 3 ulps in 64 significant bit, 	#
7692#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
7693#	rounded to double precision. The result is provably monotonic 	#
7694#	in double precision.						#
7695#									#
7696# ALGORITHM ***********************************************************	#
7697#									#
7698#	COSH								#
7699#	1. If |X| > 16380 log2, go to 3.				#
7700#									#
7701#	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae	#
7702#		y = |X|, z = exp(Y), and				#
7703#		cosh(X) = (1/2)*( z + 1/z ).				#
7704#		Exit.							#
7705#									#
7706#	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.		#
7707#									#
7708#	4. (16380 log2 < |X| <= 16480 log2)				#
7709#		cosh(X) = sign(X) * exp(|X|)/2.				#
7710#		However, invoking exp(|X|) may cause premature 		#
7711#		overflow. Thus, we calculate sinh(X) as follows:	#
7712#		Y	:= |X|						#
7713#		Fact	:=	2**(16380)				#
7714#		Y'	:= Y - 16381 log2				#
7715#		cosh(X) := Fact * exp(Y').				#
7716#		Exit.							#
7717#									#
7718#	5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
7719#		Huge*Huge to generate overflow and an infinity with	#
7720#		the appropriate sign. Huge is the largest finite number	#
7721#		in extended format. Exit.				#
7722#									#
7723#########################################################################
7724
7725TWO16380:
7726	long		0x7FFB0000,0x80000000,0x00000000,0x00000000
7727
7728	global		scosh
7729scosh:
7730	fmov.x		(%a0),%fp0		# LOAD INPUT
7731
7732	mov.l		(%a0),%d1
7733	mov.w		4(%a0),%d1
7734	and.l		&0x7FFFFFFF,%d1
7735	cmp.l		%d1,&0x400CB167
7736	bgt.b		COSHBIG
7737
7738#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7739#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7740
7741	fabs.x		%fp0			# |X|
7742
7743	mov.l		%d0,-(%sp)
7744	clr.l		%d0
7745	fmovm.x		&0x01,-(%sp)		# save |X| to stack
7746	lea		(%sp),%a0		# pass ptr to |X|
7747	bsr		setox			# FP0 IS EXP(|X|)
7748	add.l		&0xc,%sp		# erase |X| from stack
7749	fmul.s		&0x3F000000,%fp0	# (1/2)EXP(|X|)
7750	mov.l		(%sp)+,%d0
7751
7752	fmov.s		&0x3E800000,%fp1	# (1/4)
7753	fdiv.x		%fp0,%fp1		# 1/(2 EXP(|X|))
7754
7755	fmov.l		%d0,%fpcr
7756	mov.b		&FADD_OP,%d1		# last inst is ADD
7757	fadd.x		%fp1,%fp0
7758	bra		t_catch
7759
7760COSHBIG:
7761	cmp.l		%d1,&0x400CB2B3
7762	bgt.b		COSHHUGE
7763
7764	fabs.x		%fp0
7765	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
7766	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
7767
7768	mov.l		%d0,-(%sp)
7769	clr.l		%d0
7770	fmovm.x		&0x01,-(%sp)		# save fp0 to stack
7771	lea		(%sp),%a0		# pass ptr to fp0
7772	bsr		setox
7773	add.l		&0xc,%sp		# clear fp0 from stack
7774	mov.l		(%sp)+,%d0
7775
7776	fmov.l		%d0,%fpcr
7777	mov.b		&FMUL_OP,%d1		# last inst is MUL
7778	fmul.x		TWO16380(%pc),%fp0
7779	bra		t_catch
7780
7781COSHHUGE:
7782	bra		t_ovfl2
7783
7784	global		scoshd
7785#--COSH(X) = 1 FOR DENORMALIZED X
7786scoshd:
7787	fmov.s		&0x3F800000,%fp0
7788
7789	fmov.l		%d0,%fpcr
7790	fadd.s		&0x00800000,%fp0
7791	bra		t_pinx2
7792
7793#########################################################################
7794# ssinh():  computes the hyperbolic sine of a normalized input		#
7795# ssinhd(): computes the hyperbolic sine of a denormalized input	#
7796#									#
7797# INPUT *************************************************************** #
7798#	a0 = pointer to extended precision input			#
7799#	d0 = round precision,mode					#
7800#									#
7801# OUTPUT ************************************************************** #
7802#	fp0 = sinh(X)							#
7803#									#
7804# ACCURACY and MONOTONICITY *******************************************	#
7805#	The returned result is within 3 ulps in 64 significant bit, 	#
7806#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7807#	rounded to double precision. The result is provably monotonic	#
7808#	in double precision.						#
7809#									#
7810# ALGORITHM *********************************************************** #
7811#									#
7812#       SINH								#
7813#       1. If |X| > 16380 log2, go to 3.				#
7814#									#
7815#       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula	#
7816#               y = |X|, sgn = sign(X), and z = expm1(Y),		#
7817#               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).			#
7818#          Exit.							#
7819#									#
7820#       3. If |X| > 16480 log2, go to 5.				#
7821#									#
7822#       4. (16380 log2 < |X| <= 16480 log2)				#
7823#               sinh(X) = sign(X) * exp(|X|)/2.				#
7824#          However, invoking exp(|X|) may cause premature overflow.	#
7825#          Thus, we calculate sinh(X) as follows:			#
7826#             Y       := |X|						#
7827#             sgn     := sign(X)					#
7828#             sgnFact := sgn * 2**(16380)				#
7829#             Y'      := Y - 16381 log2					#
7830#             sinh(X) := sgnFact * exp(Y').				#
7831#          Exit.							#
7832#									#
7833#       5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
7834#          sign(X)*Huge*Huge to generate overflow and an infinity with	#
7835#          the appropriate sign. Huge is the largest finite number in	#
7836#          extended format. Exit.					#
7837#									#
7838#########################################################################
7839
7840	global		ssinh
7841ssinh:
7842	fmov.x		(%a0),%fp0		# LOAD INPUT
7843
7844	mov.l		(%a0),%d1
7845	mov.w		4(%a0),%d1
7846	mov.l		%d1,%a1			# save (compacted) operand
7847	and.l		&0x7FFFFFFF,%d1
7848	cmp.l		%d1,&0x400CB167
7849	bgt.b		SINHBIG
7850
7851#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7852#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7853
7854	fabs.x		%fp0			# Y = |X|
7855
7856	movm.l		&0x8040,-(%sp)		# {a1/d0}
7857	fmovm.x		&0x01,-(%sp)		# save Y on stack
7858	lea		(%sp),%a0		# pass ptr to Y
7859	clr.l		%d0
7860	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
7861	add.l		&0xc,%sp		# clear Y from stack
7862	fmov.l		&0,%fpcr
7863	movm.l		(%sp)+,&0x0201		# {a1/d0}
7864
7865	fmov.x		%fp0,%fp1
7866	fadd.s		&0x3F800000,%fp1	# 1+Z
7867	fmov.x		%fp0,-(%sp)
7868	fdiv.x		%fp1,%fp0		# Z/(1+Z)
7869	mov.l		%a1,%d1
7870	and.l		&0x80000000,%d1
7871	or.l		&0x3F000000,%d1
7872	fadd.x		(%sp)+,%fp0
7873	mov.l		%d1,-(%sp)
7874
7875	fmov.l		%d0,%fpcr
7876	mov.b		&FMUL_OP,%d1		# last inst is MUL
7877	fmul.s		(%sp)+,%fp0		# last fp inst - possible exceptions set
7878	bra		t_catch
7879
7880SINHBIG:
7881	cmp.l		%d1,&0x400CB2B3
7882	bgt		t_ovfl
7883	fabs.x		%fp0
7884	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
7885	mov.l		&0,-(%sp)
7886	mov.l		&0x80000000,-(%sp)
7887	mov.l		%a1,%d1
7888	and.l		&0x80000000,%d1
7889	or.l		&0x7FFB0000,%d1
7890	mov.l		%d1,-(%sp)		# EXTENDED FMT
7891	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
7892
7893	mov.l		%d0,-(%sp)
7894	clr.l		%d0
7895	fmovm.x		&0x01,-(%sp)		# save fp0 on stack
7896	lea		(%sp),%a0		# pass ptr to fp0
7897	bsr		setox
7898	add.l		&0xc,%sp		# clear fp0 from stack
7899
7900	mov.l		(%sp)+,%d0
7901	fmov.l		%d0,%fpcr
7902	mov.b		&FMUL_OP,%d1		# last inst is MUL
7903	fmul.x		(%sp)+,%fp0		# possible exception
7904	bra		t_catch
7905
7906	global		ssinhd
7907#--SINH(X) = X FOR DENORMALIZED X
7908ssinhd:
7909	bra		t_extdnrm
7910
7911#########################################################################
7912# stanh():  computes the hyperbolic tangent of a normalized input	#
7913# stanhd(): computes the hyperbolic tangent of a denormalized input	#
7914#									#
7915# INPUT ***************************************************************	#
7916#	a0 = pointer to extended precision input			#
7917#	d0 = round precision,mode					#
7918#									#
7919# OUTPUT **************************************************************	#
7920#	fp0 = tanh(X)							#
7921#									#
7922# ACCURACY and MONOTONICITY *******************************************	#
7923#	The returned result is within 3 ulps in 64 significant bit, 	#
7924#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7925#	rounded to double precision. The result is provably monotonic	#
7926#	in double precision.						#
7927#									#
7928# ALGORITHM ***********************************************************	#
7929#									#
7930#	TANH								#
7931#	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.		#
7932#									#
7933#	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by		#
7934#		sgn := sign(X), y := 2|X|, z := expm1(Y), and		#
7935#		tanh(X) = sgn*( z/(2+z) ).				#
7936#		Exit.							#
7937#									#
7938#	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,		#
7939#		go to 7.						#
7940#									#
7941#	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.		#
7942#									#
7943#	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by		#
7944#		sgn := sign(X), y := 2|X|, z := exp(Y),			#
7945#		tanh(X) = sgn - [ sgn*2/(1+z) ].			#
7946#		Exit.							#
7947#									#
7948#	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we	#
7949#		calculate Tanh(X) by					#
7950#		sgn := sign(X), Tiny := 2**(-126),			#
7951#		tanh(X) := sgn - sgn*Tiny.				#
7952#		Exit.							#
7953#									#
7954#	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.			#
7955#									#
7956#########################################################################
7957
7958	set		X,FP_SCR0
7959	set		XFRAC,X+4
7960
7961	set		SGN,L_SCR3
7962
7963	set		V,FP_SCR0
7964
7965	global		stanh
7966stanh:
7967	fmov.x		(%a0),%fp0		# LOAD INPUT
7968
7969	fmov.x		%fp0,X(%a6)
7970	mov.l		(%a0),%d1
7971	mov.w		4(%a0),%d1
7972	mov.l		%d1,X(%a6)
7973	and.l		&0x7FFFFFFF,%d1
7974	cmp.l		%d1, &0x3fd78000	# is |X| < 2^(-40)?
7975	blt.w		TANHBORS		# yes
7976	cmp.l		%d1, &0x3fffddce	# is |X| > (5/2)LOG2?
7977	bgt.w		TANHBORS		# yes
7978
7979#--THIS IS THE USUAL CASE
7980#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7981
7982	mov.l		X(%a6),%d1
7983	mov.l		%d1,SGN(%a6)
7984	and.l		&0x7FFF0000,%d1
7985	add.l		&0x00010000,%d1		# EXPONENT OF 2|X|
7986	mov.l		%d1,X(%a6)
7987	and.l		&0x80000000,SGN(%a6)
7988	fmov.x		X(%a6),%fp0		# FP0 IS Y = 2|X|
7989
7990	mov.l		%d0,-(%sp)
7991	clr.l		%d0
7992	fmovm.x		&0x1,-(%sp)		# save Y on stack
7993	lea		(%sp),%a0		# pass ptr to Y
7994	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
7995	add.l		&0xc,%sp		# clear Y from stack
7996	mov.l		(%sp)+,%d0
7997
7998	fmov.x		%fp0,%fp1
7999	fadd.s		&0x40000000,%fp1	# Z+2
8000	mov.l		SGN(%a6),%d1
8001	fmov.x		%fp1,V(%a6)
8002	eor.l		%d1,V(%a6)
8003
8004	fmov.l		%d0,%fpcr		# restore users round prec,mode
8005	fdiv.x		V(%a6),%fp0
8006	bra		t_inx2
8007
8008TANHBORS:
8009	cmp.l		%d1,&0x3FFF8000
8010	blt.w		TANHSM
8011
8012	cmp.l		%d1,&0x40048AA1
8013	bgt.w		TANHHUGE
8014
8015#-- (5/2) LOG2 < |X| < 50 LOG2,
8016#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
8017#--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
8018
8019	mov.l		X(%a6),%d1
8020	mov.l		%d1,SGN(%a6)
8021	and.l		&0x7FFF0000,%d1
8022	add.l		&0x00010000,%d1		# EXPO OF 2|X|
8023	mov.l		%d1,X(%a6)		# Y = 2|X|
8024	and.l		&0x80000000,SGN(%a6)
8025	mov.l		SGN(%a6),%d1
8026	fmov.x		X(%a6),%fp0		# Y = 2|X|
8027
8028	mov.l		%d0,-(%sp)
8029	clr.l		%d0
8030	fmovm.x		&0x01,-(%sp)		# save Y on stack
8031	lea		(%sp),%a0		# pass ptr to Y
8032	bsr		setox			# FP0 IS EXP(Y)
8033	add.l		&0xc,%sp		# clear Y from stack
8034	mov.l		(%sp)+,%d0
8035	mov.l		SGN(%a6),%d1
8036	fadd.s		&0x3F800000,%fp0	# EXP(Y)+1
8037
8038	eor.l		&0xC0000000,%d1		# -SIGN(X)*2
8039	fmov.s		%d1,%fp1		# -SIGN(X)*2 IN SGL FMT
8040	fdiv.x		%fp0,%fp1		# -SIGN(X)2 / [EXP(Y)+1 ]
8041
8042	mov.l		SGN(%a6),%d1
8043	or.l		&0x3F800000,%d1		# SGN
8044	fmov.s		%d1,%fp0		# SGN IN SGL FMT
8045
8046	fmov.l		%d0,%fpcr		# restore users round prec,mode
8047	mov.b		&FADD_OP,%d1		# last inst is ADD
8048	fadd.x		%fp1,%fp0
8049	bra		t_inx2
8050
8051TANHSM:
8052	fmov.l		%d0,%fpcr		# restore users round prec,mode
8053	mov.b		&FMOV_OP,%d1		# last inst is MOVE
8054	fmov.x		X(%a6),%fp0		# last inst - possible exception set
8055	bra		t_catch
8056
8057#---RETURN SGN(X) - SGN(X)EPS
8058TANHHUGE:
8059	mov.l		X(%a6),%d1
8060	and.l		&0x80000000,%d1
8061	or.l		&0x3F800000,%d1
8062	fmov.s		%d1,%fp0
8063	and.l		&0x80000000,%d1
8064	eor.l		&0x80800000,%d1		# -SIGN(X)*EPS
8065
8066	fmov.l		%d0,%fpcr		# restore users round prec,mode
8067	fadd.s		%d1,%fp0
8068	bra		t_inx2
8069
8070	global		stanhd
8071#--TANH(X) = X FOR DENORMALIZED X
8072stanhd:
8073	bra		t_extdnrm
8074
8075#########################################################################
8076# slogn():    computes the natural logarithm of a normalized input	#
8077# slognd():   computes the natural logarithm of a denormalized input	#
8078# slognp1():  computes the log(1+X) of a normalized input		#
8079# slognp1d(): computes the log(1+X) of a denormalized input		#
8080#									#
8081# INPUT ***************************************************************	#
8082#	a0 = pointer to extended precision input			#
8083#	d0 = round precision,mode					#
8084#									#
8085# OUTPUT **************************************************************	#
8086#	fp0 = log(X) or log(1+X)					#
8087#									#
8088# ACCURACY and MONOTONICITY *******************************************	#
8089#	The returned result is within 2 ulps in 64 significant bit, 	#
8090#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8091#	rounded to double precision. The result is provably monotonic	#
8092#	in double precision.						#
8093#									#
8094# ALGORITHM ***********************************************************	#
8095#	LOGN:								#
8096#	Step 1. If |X-1| < 1/16, approximate log(X) by an odd 		#
8097#		polynomial in u, where u = 2(X-1)/(X+1). Otherwise, 	#
8098#		move on to Step 2.					#
8099#									#
8100#	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first	#
8101#		seven significant bits of Y plus 2**(-7), i.e. 		#
8102#		F = 1.xxxxxx1 in base 2 where the six "x" match those 	#
8103#		of Y. Note that |Y-F| <= 2**(-7).			#
8104#									#
8105#	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a 		#
8106#		polynomial in u, log(1+u) = poly.			#
8107#									#
8108#	Step 4. Reconstruct 						#
8109#		log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)	#
8110#		by k*log(2) + (log(F) + poly). The values of log(F) are	#
8111#		calculated beforehand and stored in the program.	#
8112#									#
8113#	lognp1:								#
8114#	Step 1: If |X| < 1/16, approximate log(1+X) by an odd 		#
8115#		polynomial in u where u = 2X/(2+X). Otherwise, move on	#
8116#		to Step 2.						#
8117#									#
8118#	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done	#
8119#		in Step 2 of the algorithm for LOGN and compute 	#
8120#		log(1+X) as k*log(2) + log(F) + poly where poly 	#
8121#		approximates log(1+u), u = (Y-F)/F. 			#
8122#									#
8123#	Implementation Notes:						#
8124#	Note 1. There are 64 different possible values for F, thus 64 	#
8125#		log(F)'s need to be tabulated. Moreover, the values of	#
8126#		1/F are also tabulated so that the division in (Y-F)/F	#
8127#		can be performed by a multiplication.			#
8128#									#
8129#	Note 2. In Step 2 of lognp1, in order to preserved accuracy, 	#
8130#		the value Y-F has to be calculated carefully when 	#
8131#		1/2 <= X < 3/2. 					#
8132#									#
8133#	Note 3. To fully exploit the pipeline, polynomials are usually 	#
8134#		separated into two parts evaluated independently before	#
8135#		being added up.						#
8136#									#
8137#########################################################################
8138LOGOF2:
8139	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8140
8141one:
8142	long		0x3F800000
8143zero:
8144	long		0x00000000
8145infty:
8146	long		0x7F800000
8147negone:
8148	long		0xBF800000
8149
8150LOGA6:
8151	long		0x3FC2499A,0xB5E4040B
8152LOGA5:
8153	long		0xBFC555B5,0x848CB7DB
8154
8155LOGA4:
8156	long		0x3FC99999,0x987D8730
8157LOGA3:
8158	long		0xBFCFFFFF,0xFF6F7E97
8159
8160LOGA2:
8161	long		0x3FD55555,0x555555A4
8162LOGA1:
8163	long		0xBFE00000,0x00000008
8164
8165LOGB5:
8166	long		0x3F175496,0xADD7DAD6
8167LOGB4:
8168	long		0x3F3C71C2,0xFE80C7E0
8169
8170LOGB3:
8171	long		0x3F624924,0x928BCCFF
8172LOGB2:
8173	long		0x3F899999,0x999995EC
8174
8175LOGB1:
8176	long		0x3FB55555,0x55555555
8177TWO:
8178	long		0x40000000,0x00000000
8179
8180LTHOLD:
8181	long		0x3f990000,0x80000000,0x00000000,0x00000000
8182
8183LOGTBL:
8184	long		0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8185	long		0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8186	long		0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8187	long		0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8188	long		0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8189	long		0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8190	long		0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8191	long		0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8192	long		0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8193	long		0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8194	long		0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8195	long		0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8196	long		0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8197	long		0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8198	long		0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8199	long		0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8200	long		0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8201	long		0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8202	long		0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8203	long		0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8204	long		0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8205	long		0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8206	long		0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8207	long		0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8208	long		0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8209	long		0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8210	long		0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8211	long		0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8212	long		0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8213	long		0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8214	long		0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8215	long		0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8216	long		0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8217	long		0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8218	long		0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8219	long		0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8220	long		0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8221	long		0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8222	long		0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8223	long		0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8224	long		0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8225	long		0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8226	long		0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8227	long		0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8228	long		0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8229	long		0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8230	long		0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8231	long		0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8232	long		0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8233	long		0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8234	long		0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8235	long		0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8236	long		0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8237	long		0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8238	long		0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8239	long		0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8240	long		0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8241	long		0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8242	long		0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8243	long		0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8244	long		0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8245	long		0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8246	long		0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8247	long		0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8248	long		0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8249	long		0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8250	long		0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8251	long		0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8252	long		0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8253	long		0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8254	long		0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8255	long		0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8256	long		0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8257	long		0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8258	long		0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8259	long		0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8260	long		0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8261	long		0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8262	long		0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8263	long		0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8264	long		0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8265	long		0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8266	long		0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8267	long		0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8268	long		0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8269	long		0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8270	long		0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8271	long		0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8272	long		0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8273	long		0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8274	long		0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8275	long		0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8276	long		0x3FFE0000,0x94458094,0x45809446,0x00000000
8277	long		0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8278	long		0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8279	long		0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8280	long		0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8281	long		0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8282	long		0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8283	long		0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8284	long		0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8285	long		0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8286	long		0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8287	long		0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8288	long		0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8289	long		0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8290	long		0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8291	long		0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8292	long		0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8293	long		0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8294	long		0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8295	long		0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8296	long		0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8297	long		0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8298	long		0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8299	long		0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8300	long		0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8301	long		0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8302	long		0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8303	long		0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8304	long		0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8305	long		0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8306	long		0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8307	long		0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8308	long		0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8309	long		0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8310	long		0x3FFE0000,0x80808080,0x80808081,0x00000000
8311	long		0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8312
8313	set		ADJK,L_SCR1
8314
8315	set		X,FP_SCR0
8316	set		XDCARE,X+2
8317	set		XFRAC,X+4
8318
8319	set		F,FP_SCR1
8320	set		FFRAC,F+4
8321
8322	set		KLOG2,FP_SCR0
8323
8324	set		SAVEU,FP_SCR0
8325
8326	global		slogn
8327#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8328slogn:
8329	fmov.x		(%a0),%fp0		# LOAD INPUT
8330	mov.l		&0x00000000,ADJK(%a6)
8331
8332LOGBGN:
8333#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8334#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8335
8336	mov.l		(%a0),%d1
8337	mov.w		4(%a0),%d1
8338
8339	mov.l		(%a0),X(%a6)
8340	mov.l		4(%a0),X+4(%a6)
8341	mov.l		8(%a0),X+8(%a6)
8342
8343	cmp.l		%d1,&0			# CHECK IF X IS NEGATIVE
8344	blt.w		LOGNEG			# LOG OF NEGATIVE ARGUMENT IS INVALID
8345# X IS POSITIVE, CHECK IF X IS NEAR 1
8346	cmp.l		%d1,&0x3ffef07d 	# IS X < 15/16?
8347	blt.b		LOGMAIN			# YES
8348	cmp.l		%d1,&0x3fff8841 	# IS X > 17/16?
8349	ble.w		LOGNEAR1		# NO
8350
8351LOGMAIN:
8352#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8353
8354#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8355#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8356#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8357#--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8358#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8359#--LOG(1+U) CAN BE VERY EFFICIENT.
8360#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8361#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8362
8363#--GET K, Y, F, AND ADDRESS OF 1/F.
8364	asr.l		&8,%d1
8365	asr.l		&8,%d1			# SHIFTED 16 BITS, BIASED EXPO. OF X
8366	sub.l		&0x3FFF,%d1		# THIS IS K
8367	add.l		ADJK(%a6),%d1		# ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
8368	lea		LOGTBL(%pc),%a0		# BASE ADDRESS OF 1/F AND LOG(F)
8369	fmov.l		%d1,%fp1		# CONVERT K TO FLOATING-POINT FORMAT
8370
8371#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8372	mov.l		&0x3FFF0000,X(%a6)	# X IS NOW Y, I.E. 2^(-K)*X
8373	mov.l		XFRAC(%a6),FFRAC(%a6)
8374	and.l		&0xFE000000,FFRAC(%a6)	# FIRST 7 BITS OF Y
8375	or.l		&0x01000000,FFRAC(%a6)	# GET F: ATTACH A 1 AT THE EIGHTH BIT
8376	mov.l		FFRAC(%a6),%d1	# READY TO GET ADDRESS OF 1/F
8377	and.l		&0x7E000000,%d1
8378	asr.l		&8,%d1
8379	asr.l		&8,%d1
8380	asr.l		&4,%d1			# SHIFTED 20, D0 IS THE DISPLACEMENT
8381	add.l		%d1,%a0			# A0 IS THE ADDRESS FOR 1/F
8382
8383	fmov.x		X(%a6),%fp0
8384	mov.l		&0x3fff0000,F(%a6)
8385	clr.l		F+8(%a6)
8386	fsub.x		F(%a6),%fp0		# Y-F
8387	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3 WHILE FP0 IS NOT READY
8388#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8389#--REGISTERS SAVED: FPCR, FP1, FP2
8390
8391LP1CONT1:
8392#--AN RE-ENTRY POINT FOR LOGNP1
8393	fmul.x		(%a0),%fp0		# FP0 IS U = (Y-F)/F
8394	fmul.x		LOGOF2(%pc),%fp1	# GET K*LOG2 WHILE FP0 IS NOT READY
8395	fmov.x		%fp0,%fp2
8396	fmul.x		%fp2,%fp2		# FP2 IS V=U*U
8397	fmov.x		%fp1,KLOG2(%a6)		# PUT K*LOG2 IN MEMEORY, FREE FP1
8398
8399#--LOG(1+U) IS APPROXIMATED BY
8400#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8401#--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
8402
8403	fmov.x		%fp2,%fp3
8404	fmov.x		%fp2,%fp1
8405
8406	fmul.d		LOGA6(%pc),%fp1		# V*A6
8407	fmul.d		LOGA5(%pc),%fp2		# V*A5
8408
8409	fadd.d		LOGA4(%pc),%fp1		# A4+V*A6
8410	fadd.d		LOGA3(%pc),%fp2		# A3+V*A5
8411
8412	fmul.x		%fp3,%fp1		# V*(A4+V*A6)
8413	fmul.x		%fp3,%fp2		# V*(A3+V*A5)
8414
8415	fadd.d		LOGA2(%pc),%fp1		# A2+V*(A4+V*A6)
8416	fadd.d		LOGA1(%pc),%fp2		# A1+V*(A3+V*A5)
8417
8418	fmul.x		%fp3,%fp1		# V*(A2+V*(A4+V*A6))
8419	add.l		&16,%a0			# ADDRESS OF LOG(F)
8420	fmul.x		%fp3,%fp2		# V*(A1+V*(A3+V*A5))
8421
8422	fmul.x		%fp0,%fp1		# U*V*(A2+V*(A4+V*A6))
8423	fadd.x		%fp2,%fp0		# U+V*(A1+V*(A3+V*A5))
8424
8425	fadd.x		(%a0),%fp1		# LOG(F)+U*V*(A2+V*(A4+V*A6))
8426	fmovm.x		(%sp)+,&0x30		# RESTORE FP2-3
8427	fadd.x		%fp1,%fp0		# FP0 IS LOG(F) + LOG(1+U)
8428
8429	fmov.l		%d0,%fpcr
8430	fadd.x		KLOG2(%a6),%fp0		# FINAL ADD
8431	bra		t_inx2
8432
8433
8434LOGNEAR1:
8435
8436# if the input is exactly equal to one, then exit through ld_pzero.
8437# if these 2 lines weren't here, the correct answer would be returned
8438# but the INEX2 bit would be set.
8439	fcmp.b		%fp0,&0x1		# is it equal to one?
8440	fbeq.l		ld_pzero		# yes
8441
8442#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8443	fmov.x		%fp0,%fp1
8444	fsub.s		one(%pc),%fp1		# FP1 IS X-1
8445	fadd.s		one(%pc),%fp0		# FP0 IS X+1
8446	fadd.x		%fp1,%fp1		# FP1 IS 2(X-1)
8447#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8448#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8449
8450LP1CONT2:
8451#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8452	fdiv.x		%fp0,%fp1		# FP1 IS U
8453	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3
8454#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8455#--LET V=U*U, W=V*V, CALCULATE
8456#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8457#--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
8458	fmov.x		%fp1,%fp0
8459	fmul.x		%fp0,%fp0		# FP0 IS V
8460	fmov.x		%fp1,SAVEU(%a6)		# STORE U IN MEMORY, FREE FP1
8461	fmov.x		%fp0,%fp1
8462	fmul.x		%fp1,%fp1		# FP1 IS W
8463
8464	fmov.d		LOGB5(%pc),%fp3
8465	fmov.d		LOGB4(%pc),%fp2
8466
8467	fmul.x		%fp1,%fp3		# W*B5
8468	fmul.x		%fp1,%fp2		# W*B4
8469
8470	fadd.d		LOGB3(%pc),%fp3		# B3+W*B5
8471	fadd.d		LOGB2(%pc),%fp2		# B2+W*B4
8472
8473	fmul.x		%fp3,%fp1		# W*(B3+W*B5), FP3 RELEASED
8474
8475	fmul.x		%fp0,%fp2		# V*(B2+W*B4)
8476
8477	fadd.d		LOGB1(%pc),%fp1		# B1+W*(B3+W*B5)
8478	fmul.x		SAVEU(%a6),%fp0		# FP0 IS U*V
8479
8480	fadd.x		%fp2,%fp1		# B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8481	fmovm.x		(%sp)+,&0x30		# FP2-3 RESTORED
8482
8483	fmul.x		%fp1,%fp0		# U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8484
8485	fmov.l		%d0,%fpcr
8486	fadd.x		SAVEU(%a6),%fp0
8487	bra		t_inx2
8488
8489#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8490LOGNEG:
8491	bra		t_operr
8492
8493	global		slognd
8494slognd:
8495#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8496
8497	mov.l		&-100,ADJK(%a6)		# INPUT = 2^(ADJK) * FP0
8498
8499#----normalize the input value by left shifting k bits (k to be determined
8500#----below), adjusting exponent and storing -k to  ADJK
8501#----the value TWOTO100 is no longer needed.
8502#----Note that this code assumes the denormalized input is NON-ZERO.
8503
8504	movm.l		&0x3f00,-(%sp)		# save some registers  {d2-d7}
8505	mov.l		(%a0),%d3		# D3 is exponent of smallest norm. #
8506	mov.l		4(%a0),%d4
8507	mov.l		8(%a0),%d5		# (D4,D5) is (Hi_X,Lo_X)
8508	clr.l		%d2			# D2 used for holding K
8509
8510	tst.l		%d4
8511	bne.b		Hi_not0
8512
8513Hi_0:
8514	mov.l		%d5,%d4
8515	clr.l		%d5
8516	mov.l		&32,%d2
8517	clr.l		%d6
8518	bfffo		%d4{&0:&32},%d6
8519	lsl.l		%d6,%d4
8520	add.l		%d6,%d2			# (D3,D4,D5) is normalized
8521
8522	mov.l		%d3,X(%a6)
8523	mov.l		%d4,XFRAC(%a6)
8524	mov.l		%d5,XFRAC+4(%a6)
8525	neg.l		%d2
8526	mov.l		%d2,ADJK(%a6)
8527	fmov.x		X(%a6),%fp0
8528	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
8529	lea		X(%a6),%a0
8530	bra.w		LOGBGN			# begin regular log(X)
8531
8532Hi_not0:
8533	clr.l		%d6
8534	bfffo		%d4{&0:&32},%d6		# find first 1
8535	mov.l		%d6,%d2			# get k
8536	lsl.l		%d6,%d4
8537	mov.l		%d5,%d7			# a copy of D5
8538	lsl.l		%d6,%d5
8539	neg.l		%d6
8540	add.l		&32,%d6
8541	lsr.l		%d6,%d7
8542	or.l		%d7,%d4			# (D3,D4,D5) normalized
8543
8544	mov.l		%d3,X(%a6)
8545	mov.l		%d4,XFRAC(%a6)
8546	mov.l		%d5,XFRAC+4(%a6)
8547	neg.l		%d2
8548	mov.l		%d2,ADJK(%a6)
8549	fmov.x		X(%a6),%fp0
8550	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
8551	lea		X(%a6),%a0
8552	bra.w		LOGBGN			# begin regular log(X)
8553
8554	global		slognp1
8555#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8556slognp1:
8557	fmov.x		(%a0),%fp0		# LOAD INPUT
8558	fabs.x		%fp0			# test magnitude
8559	fcmp.x		%fp0,LTHOLD(%pc)	# compare with min threshold
8560	fbgt.w		LP1REAL			# if greater, continue
8561	fmov.l		%d0,%fpcr
8562	mov.b		&FMOV_OP,%d1		# last inst is MOVE
8563	fmov.x		(%a0),%fp0		# return signed argument
8564	bra		t_catch
8565
8566LP1REAL:
8567	fmov.x		(%a0),%fp0		# LOAD INPUT
8568	mov.l		&0x00000000,ADJK(%a6)
8569	fmov.x		%fp0,%fp1		# FP1 IS INPUT Z
8570	fadd.s		one(%pc),%fp0		# X := ROUND(1+Z)
8571	fmov.x		%fp0,X(%a6)
8572	mov.w		XFRAC(%a6),XDCARE(%a6)
8573	mov.l		X(%a6),%d1
8574	cmp.l		%d1,&0
8575	ble.w		LP1NEG0			# LOG OF ZERO OR -VE
8576	cmp.l		%d1,&0x3ffe8000 	# IS BOUNDS [1/2,3/2]?
8577	blt.w		LOGMAIN
8578	cmp.l		%d1,&0x3fffc000
8579	bgt.w		LOGMAIN
8580#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8581#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8582#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8583
8584LP1NEAR1:
8585#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8586	cmp.l		%d1,&0x3ffef07d
8587	blt.w		LP1CARE
8588	cmp.l		%d1,&0x3fff8841
8589	bgt.w		LP1CARE
8590
8591LP1ONE16:
8592#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8593#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8594	fadd.x		%fp1,%fp1		# FP1 IS 2Z
8595	fadd.s		one(%pc),%fp0		# FP0 IS 1+X
8596#--U = FP1/FP0
8597	bra.w		LP1CONT2
8598
8599LP1CARE:
8600#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8601#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8602#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8603#--THERE ARE ONLY TWO CASES.
8604#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8605#--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
8606#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8607#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8608
8609	mov.l		XFRAC(%a6),FFRAC(%a6)
8610	and.l		&0xFE000000,FFRAC(%a6)
8611	or.l		&0x01000000,FFRAC(%a6)	# F OBTAINED
8612	cmp.l		%d1,&0x3FFF8000		# SEE IF 1+Z > 1
8613	bge.b		KISZERO
8614
8615KISNEG1:
8616	fmov.s		TWO(%pc),%fp0
8617	mov.l		&0x3fff0000,F(%a6)
8618	clr.l		F+8(%a6)
8619	fsub.x		F(%a6),%fp0		# 2-F
8620	mov.l		FFRAC(%a6),%d1
8621	and.l		&0x7E000000,%d1
8622	asr.l		&8,%d1
8623	asr.l		&8,%d1
8624	asr.l		&4,%d1			# D0 CONTAINS DISPLACEMENT FOR 1/F
8625	fadd.x		%fp1,%fp1		# GET 2Z
8626	fmovm.x		&0xc,-(%sp)		# SAVE FP2  {%fp2/%fp3}
8627	fadd.x		%fp1,%fp0		# FP0 IS Y-F = (2-F)+2Z
8628	lea		LOGTBL(%pc),%a0		# A0 IS ADDRESS OF 1/F
8629	add.l		%d1,%a0
8630	fmov.s		negone(%pc),%fp1	# FP1 IS K = -1
8631	bra.w		LP1CONT1
8632
8633KISZERO:
8634	fmov.s		one(%pc),%fp0
8635	mov.l		&0x3fff0000,F(%a6)
8636	clr.l		F+8(%a6)
8637	fsub.x		F(%a6),%fp0		# 1-F
8638	mov.l		FFRAC(%a6),%d1
8639	and.l		&0x7E000000,%d1
8640	asr.l		&8,%d1
8641	asr.l		&8,%d1
8642	asr.l		&4,%d1
8643	fadd.x		%fp1,%fp0		# FP0 IS Y-F
8644	fmovm.x		&0xc,-(%sp)		# FP2 SAVED {%fp2/%fp3}
8645	lea		LOGTBL(%pc),%a0
8646	add.l		%d1,%a0			# A0 IS ADDRESS OF 1/F
8647	fmov.s		zero(%pc),%fp1		# FP1 IS K = 0
8648	bra.w		LP1CONT1
8649
8650LP1NEG0:
8651#--FPCR SAVED. D0 IS X IN COMPACT FORM.
8652	cmp.l		%d1,&0
8653	blt.b		LP1NEG
8654LP1ZERO:
8655	fmov.s		negone(%pc),%fp0
8656
8657	fmov.l		%d0,%fpcr
8658	bra		t_dz
8659
8660LP1NEG:
8661	fmov.s		zero(%pc),%fp0
8662
8663	fmov.l		%d0,%fpcr
8664	bra		t_operr
8665
8666	global		slognp1d
8667#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8668# Simply return the denorm
8669slognp1d:
8670	bra		t_extdnrm
8671
8672#########################################################################
8673# satanh():  computes the inverse hyperbolic tangent of a norm input	#
8674# satanhd(): computes the inverse hyperbolic tangent of a denorm input	#
8675#									#
8676# INPUT ***************************************************************	#
8677#	a0 = pointer to extended precision input			#
8678#	d0 = round precision,mode					#
8679#									#
8680# OUTPUT **************************************************************	#
8681#	fp0 = arctanh(X)						#
8682#									#
8683# ACCURACY and MONOTONICITY *******************************************	#
8684#	The returned result is within 3 ulps in	64 significant bit,	#
8685#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8686#	rounded to double precision. The result is provably monotonic	#
8687#	in double precision.						#
8688#									#
8689# ALGORITHM ***********************************************************	#
8690#									#
8691#	ATANH								#
8692#	1. If |X| >= 1, go to 3.					#
8693#									#
8694#	2. (|X| < 1) Calculate atanh(X) by				#
8695#		sgn := sign(X)						#
8696#		y := |X|						#
8697#		z := 2y/(1-y)						#
8698#		atanh(X) := sgn * (1/2) * logp1(z)			#
8699#		Exit.							#
8700#									#
8701#	3. If |X| > 1, go to 5.						#
8702#									#
8703#	4. (|X| = 1) Generate infinity with an appropriate sign and	#
8704#		divide-by-zero by					#
8705#		sgn := sign(X)						#
8706#		atan(X) := sgn / (+0).					#
8707#		Exit.							#
8708#									#
8709#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
8710#		Exit.							#
8711#									#
8712#########################################################################
8713
8714	global		satanh
8715satanh:
8716	mov.l		(%a0),%d1
8717	mov.w		4(%a0),%d1
8718	and.l		&0x7FFFFFFF,%d1
8719	cmp.l		%d1,&0x3FFF8000
8720	bge.b		ATANHBIG
8721
8722#--THIS IS THE USUAL CASE, |X| < 1
8723#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8724
8725	fabs.x		(%a0),%fp0		# Y = |X|
8726	fmov.x		%fp0,%fp1
8727	fneg.x		%fp1			# -Y
8728	fadd.x		%fp0,%fp0		# 2Y
8729	fadd.s		&0x3F800000,%fp1	# 1-Y
8730	fdiv.x		%fp1,%fp0		# 2Y/(1-Y)
8731	mov.l		(%a0),%d1
8732	and.l		&0x80000000,%d1
8733	or.l		&0x3F000000,%d1		# SIGN(X)*HALF
8734	mov.l		%d1,-(%sp)
8735
8736	mov.l		%d0,-(%sp)		# save rnd prec,mode
8737	clr.l		%d0			# pass ext prec,RN
8738	fmovm.x		&0x01,-(%sp)		# save Z on stack
8739	lea		(%sp),%a0		# pass ptr to Z
8740	bsr		slognp1			# LOG1P(Z)
8741	add.l		&0xc,%sp		# clear Z from stack
8742
8743	mov.l		(%sp)+,%d0		# fetch old prec,mode
8744	fmov.l		%d0,%fpcr		# load it
8745	mov.b		&FMUL_OP,%d1		# last inst is MUL
8746	fmul.s		(%sp)+,%fp0
8747	bra		t_catch
8748
8749ATANHBIG:
8750	fabs.x		(%a0),%fp0		# |X|
8751	fcmp.s		%fp0,&0x3F800000
8752	fbgt		t_operr
8753	bra		t_dz
8754
8755	global		satanhd
8756#--ATANH(X) = X FOR DENORMALIZED X
8757satanhd:
8758	bra		t_extdnrm
8759
8760#########################################################################
8761# slog10():  computes the base-10 logarithm of a normalized input	#
8762# slog10d(): computes the base-10 logarithm of a denormalized input	#
8763# slog2():   computes the base-2 logarithm of a normalized input	#
8764# slog2d():  computes the base-2 logarithm of a denormalized input	#
8765#									#
8766# INPUT *************************************************************** #
8767#	a0 = pointer to extended precision input			#
8768#	d0 = round precision,mode					#
8769#									#
8770# OUTPUT **************************************************************	#
8771#	fp0 = log_10(X) or log_2(X)					#
8772#									#
8773# ACCURACY and MONOTONICITY *******************************************	#
8774#	The returned result is within 1.7 ulps in 64 significant bit,	#
8775#	i.e. within 0.5003 ulp to 53 bits if the result is subsequently	#
8776#	rounded to double precision. The result is provably monotonic	#
8777#	in double precision.						#
8778#									#
8779# ALGORITHM ***********************************************************	#
8780#									#
8781#       slog10d:							#
8782#									#
8783#       Step 0.	If X < 0, create a NaN and raise the invalid operation	#
8784#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8785#       Notes:  Default means round-to-nearest mode, no floating-point	#
8786#               traps, and precision control = double extended.		#
8787#									#
8788#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
8789#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
8790#									#
8791#       Step 2.  Compute log_10(X) = log(X) * (1/log(10)).		#
8792#            2.1 Restore the user FPCR					#
8793#            2.2 Return ans := Y * INV_L10.				#
8794#									#
8795#       slog10: 							#
8796#									#
8797#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8798#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8799#       Notes:  Default means round-to-nearest mode, no floating-point	#
8800#               traps, and precision control = double extended.		#
8801#									#
8802#       Step 1. Call sLogN to obtain Y = log(X), the natural log of X.	#
8803#									#
8804#       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).		#
8805#            2.1  Restore the user FPCR					#
8806#            2.2  Return ans := Y * INV_L10.				#
8807#									#
8808#       sLog2d:								#
8809#									#
8810#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8811#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8812#       Notes:  Default means round-to-nearest mode, no floating-point	#
8813#               traps, and precision control = double extended.		#
8814#									#
8815#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
8816#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
8817#									#
8818#       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).		#
8819#            2.1  Restore the user FPCR					#
8820#            2.2  Return ans := Y * INV_L2.				#
8821#									#
8822#       sLog2:								#
8823#									#
8824#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8825#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8826#       Notes:  Default means round-to-nearest mode, no floating-point	#
8827#               traps, and precision control = double extended.		#
8828#									#
8829#       Step 1. If X is not an integer power of two, i.e., X != 2^k,	#
8830#               go to Step 3.						#
8831#									#
8832#       Step 2.   Return k.						#
8833#            2.1  Get integer k, X = 2^k.				#
8834#            2.2  Restore the user FPCR.				#
8835#            2.3  Return ans := convert-to-double-extended(k).		#
8836#									#
8837#       Step 3. Call sLogN to obtain Y = log(X), the natural log of X.	#
8838#									#
8839#       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).		#
8840#            4.1  Restore the user FPCR					#
8841#            4.2  Return ans := Y * INV_L2.				#
8842#									#
8843#########################################################################
8844
8845INV_L10:
8846	long		0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8847
8848INV_L2:
8849	long		0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8850
8851	global		slog10
8852#--entry point for Log10(X), X is normalized
8853slog10:
8854	fmov.b		&0x1,%fp0
8855	fcmp.x		%fp0,(%a0)		# if operand == 1,
8856	fbeq.l		ld_pzero		# return an EXACT zero
8857
8858	mov.l		(%a0),%d1
8859	blt.w		invalid
8860	mov.l		%d0,-(%sp)
8861	clr.l		%d0
8862	bsr		slogn			# log(X), X normal.
8863	fmov.l		(%sp)+,%fpcr
8864	fmul.x		INV_L10(%pc),%fp0
8865	bra		t_inx2
8866
8867	global		slog10d
8868#--entry point for Log10(X), X is denormalized
8869slog10d:
8870	mov.l		(%a0),%d1
8871	blt.w		invalid
8872	mov.l		%d0,-(%sp)
8873	clr.l		%d0
8874	bsr		slognd			# log(X), X denorm.
8875	fmov.l		(%sp)+,%fpcr
8876	fmul.x		INV_L10(%pc),%fp0
8877	bra		t_minx2
8878
8879	global		slog2
8880#--entry point for Log2(X), X is normalized
8881slog2:
8882	mov.l		(%a0),%d1
8883	blt.w		invalid
8884
8885	mov.l		8(%a0),%d1
8886	bne.b		continue		# X is not 2^k
8887
8888	mov.l		4(%a0),%d1
8889	and.l		&0x7FFFFFFF,%d1
8890	bne.b		continue
8891
8892#--X = 2^k.
8893	mov.w		(%a0),%d1
8894	and.l		&0x00007FFF,%d1
8895	sub.l		&0x3FFF,%d1
8896	beq.l		ld_pzero
8897	fmov.l		%d0,%fpcr
8898	fmov.l		%d1,%fp0
8899	bra		t_inx2
8900
8901continue:
8902	mov.l		%d0,-(%sp)
8903	clr.l		%d0
8904	bsr		slogn			# log(X), X normal.
8905	fmov.l		(%sp)+,%fpcr
8906	fmul.x		INV_L2(%pc),%fp0
8907	bra		t_inx2
8908
8909invalid:
8910	bra		t_operr
8911
8912	global		slog2d
8913#--entry point for Log2(X), X is denormalized
8914slog2d:
8915	mov.l		(%a0),%d1
8916	blt.w		invalid
8917	mov.l		%d0,-(%sp)
8918	clr.l		%d0
8919	bsr		slognd			# log(X), X denorm.
8920	fmov.l		(%sp)+,%fpcr
8921	fmul.x		INV_L2(%pc),%fp0
8922	bra		t_minx2
8923
8924#########################################################################
8925# stwotox():  computes 2**X for a normalized input			#
8926# stwotoxd(): computes 2**X for a denormalized input			#
8927# stentox():  computes 10**X for a normalized input			#
8928# stentoxd(): computes 10**X for a denormalized input			#
8929#									#
8930# INPUT ***************************************************************	#
8931#	a0 = pointer to extended precision input			#
8932#	d0 = round precision,mode					#
8933#									#
8934# OUTPUT **************************************************************	#
8935#	fp0 = 2**X or 10**X						#
8936#									#
8937# ACCURACY and MONOTONICITY *******************************************	#
8938#	The returned result is within 2 ulps in 64 significant bit, 	#
8939#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8940#	rounded to double precision. The result is provably monotonic	#
8941#	in double precision.						#
8942#									#
8943# ALGORITHM ***********************************************************	#
8944#									#
8945#	twotox								#
8946#	1. If |X| > 16480, go to ExpBig.				#
8947#									#
8948#	2. If |X| < 2**(-70), go to ExpSm.				#
8949#									#
8950#	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore	#
8951#		decompose N as						#
8952#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
8953#									#
8954#	4. Overwrite r := r * log2. Then				#
8955#		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
8956#		Go to expr to compute that expression.			#
8957#									#
8958#	tentox								#
8959#	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.	#
8960#									#
8961#	2. If |X| < 2**(-70), go to ExpSm.				#
8962#									#
8963#	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set		#
8964#		N := round-to-int(y). Decompose N as			#
8965#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
8966#									#
8967#	4. Define r as							#
8968#		r := ((X - N*L1)-N*L2) * L10				#
8969#		where L1, L2 are the leading and trailing parts of 	#
8970#		log_10(2)/64 and L10 is the natural log of 10. Then	#
8971#		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
8972#		Go to expr to compute that expression.			#
8973#									#
8974#	expr								#
8975#	1. Fetch 2**(j/64) from table as Fact1 and Fact2.		#
8976#									#
8977#	2. Overwrite Fact1 and Fact2 by					#
8978#		Fact1 := 2**(M) * Fact1					#
8979#		Fact2 := 2**(M) * Fact2					#
8980#		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).		#
8981#									#
8982#	3. Calculate P where 1 + P approximates exp(r):			#
8983#		P = r + r*r*(A1+r*(A2+...+r*A5)).			#
8984#									#
8985#	4. Let AdjFact := 2**(M'). Return				#
8986#		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).		#
8987#		Exit.							#
8988#									#
8989#	ExpBig								#
8990#	1. Generate overflow by Huge * Huge if X > 0; otherwise, 	#
8991#	        generate underflow by Tiny * Tiny.			#
8992#									#
8993#	ExpSm								#
8994#	1. Return 1 + X.						#
8995#									#
8996#########################################################################
8997
8998L2TEN64:
8999	long		0x406A934F,0x0979A371	# 64LOG10/LOG2
9000L10TWO1:
9001	long		0x3F734413,0x509F8000	# LOG2/64LOG10
9002
9003L10TWO2:
9004	long		0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
9005
9006LOG10:	long		0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
9007
9008LOG2:	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
9009
9010EXPA5:	long		0x3F56C16D,0x6F7BD0B2
9011EXPA4:	long		0x3F811112,0x302C712C
9012EXPA3:	long		0x3FA55555,0x55554CC1
9013EXPA2:	long		0x3FC55555,0x55554A54
9014EXPA1:	long		0x3FE00000,0x00000000,0x00000000,0x00000000
9015
9016TEXPTBL:
9017	long		0x3FFF0000,0x80000000,0x00000000,0x3F738000
9018	long		0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
9019	long		0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
9020	long		0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
9021	long		0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
9022	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
9023	long		0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
9024	long		0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
9025	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
9026	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
9027	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
9028	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
9029	long		0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
9030	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
9031	long		0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
9032	long		0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
9033	long		0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
9034	long		0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
9035	long		0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
9036	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
9037	long		0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
9038	long		0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
9039	long		0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
9040	long		0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
9041	long		0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
9042	long		0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
9043	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
9044	long		0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
9045	long		0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
9046	long		0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
9047	long		0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
9048	long		0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
9049	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
9050	long		0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
9051	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
9052	long		0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
9053	long		0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
9054	long		0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
9055	long		0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
9056	long		0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
9057	long		0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
9058	long		0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
9059	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
9060	long		0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
9061	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
9062	long		0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
9063	long		0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
9064	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
9065	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
9066	long		0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
9067	long		0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
9068	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
9069	long		0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
9070	long		0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
9071	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
9072	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
9073	long		0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
9074	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
9075	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
9076	long		0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
9077	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
9078	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
9079	long		0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
9080	long		0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
9081
9082	set		INT,L_SCR1
9083
9084	set		X,FP_SCR0
9085	set		XDCARE,X+2
9086	set		XFRAC,X+4
9087
9088	set		ADJFACT,FP_SCR0
9089
9090	set		FACT1,FP_SCR0
9091	set		FACT1HI,FACT1+4
9092	set		FACT1LOW,FACT1+8
9093
9094	set		FACT2,FP_SCR1
9095	set		FACT2HI,FACT2+4
9096	set		FACT2LOW,FACT2+8
9097
9098	global		stwotox
9099#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9100stwotox:
9101	fmovm.x		(%a0),&0x80		# LOAD INPUT
9102
9103	mov.l		(%a0),%d1
9104	mov.w		4(%a0),%d1
9105	fmov.x		%fp0,X(%a6)
9106	and.l		&0x7FFFFFFF,%d1
9107
9108	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
9109	bge.b		TWOOK1
9110	bra.w		EXPBORS
9111
9112TWOOK1:
9113	cmp.l		%d1,&0x400D80C0		# |X| > 16480?
9114	ble.b		TWOMAIN
9115	bra.w		EXPBORS
9116
9117TWOMAIN:
9118#--USUAL CASE, 2^(-70) <= |X| <= 16480
9119
9120	fmov.x		%fp0,%fp1
9121	fmul.s		&0x42800000,%fp1	# 64 * X
9122	fmov.l		%fp1,INT(%a6)		# N = ROUND-TO-INT(64 X)
9123	mov.l		%d2,-(%sp)
9124	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
9125	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
9126	mov.l		INT(%a6),%d1
9127	mov.l		%d1,%d2
9128	and.l		&0x3F,%d1		# D0 IS J
9129	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
9130	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
9131	asr.l		&6,%d2			# d2 IS L, N = 64L + J
9132	mov.l		%d2,%d1
9133	asr.l		&1,%d1			# D0 IS M
9134	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
9135	add.l		&0x3FFF,%d2
9136
9137#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9138#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9139#--ADJFACT = 2^(M').
9140#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9141
9142	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
9143
9144	fmul.s		&0x3C800000,%fp1	# (1/64)*N
9145	mov.l		(%a1)+,FACT1(%a6)
9146	mov.l		(%a1)+,FACT1HI(%a6)
9147	mov.l		(%a1)+,FACT1LOW(%a6)
9148	mov.w		(%a1)+,FACT2(%a6)
9149
9150	fsub.x		%fp1,%fp0		# X - (1/64)*INT(64 X)
9151
9152	mov.w		(%a1)+,FACT2HI(%a6)
9153	clr.w		FACT2HI+2(%a6)
9154	clr.l		FACT2LOW(%a6)
9155	add.w		%d1,FACT1(%a6)
9156	fmul.x		LOG2(%pc),%fp0		# FP0 IS R
9157	add.w		%d1,FACT2(%a6)
9158
9159	bra.w		expr
9160
9161EXPBORS:
9162#--FPCR, D0 SAVED
9163	cmp.l		%d1,&0x3FFF8000
9164	bgt.b		TEXPBIG
9165
9166#--|X| IS SMALL, RETURN 1 + X
9167
9168	fmov.l		%d0,%fpcr		# restore users round prec,mode
9169	fadd.s		&0x3F800000,%fp0	# RETURN 1 + X
9170	bra		t_pinx2
9171
9172TEXPBIG:
9173#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9174#--REGISTERS SAVE SO FAR ARE FPCR AND  D0
9175	mov.l		X(%a6),%d1
9176	cmp.l		%d1,&0
9177	blt.b		EXPNEG
9178
9179	bra		t_ovfl2			# t_ovfl expects positive value
9180
9181EXPNEG:
9182	bra		t_unfl2			# t_unfl expects positive value
9183
9184	global		stwotoxd
9185stwotoxd:
9186#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9187
9188	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
9189	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
9190	mov.l		(%a0),%d1
9191	or.l		&0x00800001,%d1
9192	fadd.s		%d1,%fp0
9193	bra		t_pinx2
9194
9195	global		stentox
9196#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9197stentox:
9198	fmovm.x		(%a0),&0x80		# LOAD INPUT
9199
9200	mov.l		(%a0),%d1
9201	mov.w		4(%a0),%d1
9202	fmov.x		%fp0,X(%a6)
9203	and.l		&0x7FFFFFFF,%d1
9204
9205	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
9206	bge.b		TENOK1
9207	bra.w		EXPBORS
9208
9209TENOK1:
9210	cmp.l		%d1,&0x400B9B07		# |X| <= 16480*log2/log10 ?
9211	ble.b		TENMAIN
9212	bra.w		EXPBORS
9213
9214TENMAIN:
9215#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9216
9217	fmov.x		%fp0,%fp1
9218	fmul.d		L2TEN64(%pc),%fp1	# X*64*LOG10/LOG2
9219	fmov.l		%fp1,INT(%a6)		# N=INT(X*64*LOG10/LOG2)
9220	mov.l		%d2,-(%sp)
9221	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
9222	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
9223	mov.l		INT(%a6),%d1
9224	mov.l		%d1,%d2
9225	and.l		&0x3F,%d1		# D0 IS J
9226	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
9227	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
9228	asr.l		&6,%d2			# d2 IS L, N = 64L + J
9229	mov.l		%d2,%d1
9230	asr.l		&1,%d1			# D0 IS M
9231	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
9232	add.l		&0x3FFF,%d2
9233
9234#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9235#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9236#--ADJFACT = 2^(M').
9237#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9238	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
9239
9240	fmov.x		%fp1,%fp2
9241
9242	fmul.d		L10TWO1(%pc),%fp1	# N*(LOG2/64LOG10)_LEAD
9243	mov.l		(%a1)+,FACT1(%a6)
9244
9245	fmul.x		L10TWO2(%pc),%fp2	# N*(LOG2/64LOG10)_TRAIL
9246
9247	mov.l		(%a1)+,FACT1HI(%a6)
9248	mov.l		(%a1)+,FACT1LOW(%a6)
9249	fsub.x		%fp1,%fp0		# X - N L_LEAD
9250	mov.w		(%a1)+,FACT2(%a6)
9251
9252	fsub.x		%fp2,%fp0		# X - N L_TRAIL
9253
9254	mov.w		(%a1)+,FACT2HI(%a6)
9255	clr.w		FACT2HI+2(%a6)
9256	clr.l		FACT2LOW(%a6)
9257
9258	fmul.x		LOG10(%pc),%fp0		# FP0 IS R
9259	add.w		%d1,FACT1(%a6)
9260	add.w		%d1,FACT2(%a6)
9261
9262expr:
9263#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9264#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9265#--FP0 IS R. THE FOLLOWING CODE COMPUTES
9266#--	2**(M'+M) * 2**(J/64) * EXP(R)
9267
9268	fmov.x		%fp0,%fp1
9269	fmul.x		%fp1,%fp1		# FP1 IS S = R*R
9270
9271	fmov.d		EXPA5(%pc),%fp2		# FP2 IS A5
9272	fmov.d		EXPA4(%pc),%fp3		# FP3 IS A4
9273
9274	fmul.x		%fp1,%fp2		# FP2 IS S*A5
9275	fmul.x		%fp1,%fp3		# FP3 IS S*A4
9276
9277	fadd.d		EXPA3(%pc),%fp2		# FP2 IS A3+S*A5
9278	fadd.d		EXPA2(%pc),%fp3		# FP3 IS A2+S*A4
9279
9280	fmul.x		%fp1,%fp2		# FP2 IS S*(A3+S*A5)
9281	fmul.x		%fp1,%fp3		# FP3 IS S*(A2+S*A4)
9282
9283	fadd.d		EXPA1(%pc),%fp2		# FP2 IS A1+S*(A3+S*A5)
9284	fmul.x		%fp0,%fp3		# FP3 IS R*S*(A2+S*A4)
9285
9286	fmul.x		%fp1,%fp2		# FP2 IS S*(A1+S*(A3+S*A5))
9287	fadd.x		%fp3,%fp0		# FP0 IS R+R*S*(A2+S*A4)
9288	fadd.x		%fp2,%fp0		# FP0 IS EXP(R) - 1
9289
9290	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
9291
9292#--FINAL RECONSTRUCTION PROCESS
9293#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
9294
9295	fmul.x		FACT1(%a6),%fp0
9296	fadd.x		FACT2(%a6),%fp0
9297	fadd.x		FACT1(%a6),%fp0
9298
9299	fmov.l		%d0,%fpcr		# restore users round prec,mode
9300	mov.w		%d2,ADJFACT(%a6)	# INSERT EXPONENT
9301	mov.l		(%sp)+,%d2
9302	mov.l		&0x80000000,ADJFACT+4(%a6)
9303	clr.l		ADJFACT+8(%a6)
9304	mov.b		&FMUL_OP,%d1		# last inst is MUL
9305	fmul.x		ADJFACT(%a6),%fp0	# FINAL ADJUSTMENT
9306	bra		t_catch
9307
9308	global		stentoxd
9309stentoxd:
9310#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9311
9312	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
9313	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
9314	mov.l		(%a0),%d1
9315	or.l		&0x00800001,%d1
9316	fadd.s		%d1,%fp0
9317	bra		t_pinx2
9318
9319#########################################################################
9320# smovcr(): returns the ROM constant at the offset specified in d1	#
9321#	    rounded to the mode and precision specified in d0. 		#
9322#									#
9323# INPUT	***************************************************************	#
9324# 	d0 = rnd prec,mode						#
9325#	d1 = ROM offset							#
9326#									#
9327# OUTPUT **************************************************************	#
9328#	fp0 = the ROM constant rounded to the user's rounding mode,prec	#
9329#									#
9330#########################################################################
9331
9332	global		smovcr
9333smovcr:
9334	mov.l		%d1,-(%sp)		# save rom offset for a sec
9335
9336	lsr.b		&0x4,%d0		# shift ctrl bits to lo
9337	mov.l		%d0,%d1			# make a copy
9338	andi.w		&0x3,%d1		# extract rnd mode
9339	andi.w		&0xc,%d0		# extract rnd prec
9340	swap		%d0			# put rnd prec in hi
9341	mov.w		%d1,%d0			# put rnd mode in lo
9342
9343	mov.l		(%sp)+,%d1		# get rom offset
9344
9345#
9346# check range of offset
9347#
9348	tst.b		%d1			# if zero, offset is to pi
9349	beq.b		pi_tbl			# it is pi
9350	cmpi.b		%d1,&0x0a		# check range $01 - $0a
9351	ble.b		z_val			# if in this range, return zero
9352	cmpi.b		%d1,&0x0e		# check range $0b - $0e
9353	ble.b		sm_tbl			# valid constants in this range
9354	cmpi.b		%d1,&0x2f		# check range $10 - $2f
9355	ble.b		z_val			# if in this range, return zero
9356	cmpi.b		%d1,&0x3f		# check range $30 - $3f
9357	ble.b		bg_tbl			# valid constants in this range
9358
9359z_val:
9360	bra.l		ld_pzero		# return a zero
9361
9362#
9363# the answer is PI rounded to the proper precision.
9364#
9365# fetch a pointer to the answer table relating to the proper rounding
9366# precision.
9367#
9368pi_tbl:
9369	tst.b		%d0			# is rmode RN?
9370	bne.b		pi_not_rn		# no
9371pi_rn:
9372	lea.l		PIRN(%pc),%a0		# yes; load PI RN table addr
9373	bra.w		set_finx
9374pi_not_rn:
9375	cmpi.b		%d0,&rp_mode		# is rmode RP?
9376	beq.b		pi_rp			# yes
9377pi_rzrm:
9378	lea.l		PIRZRM(%pc),%a0		# no; load PI RZ,RM table addr
9379	bra.b		set_finx
9380pi_rp:
9381	lea.l		PIRP(%pc),%a0		# load PI RP table addr
9382	bra.b		set_finx
9383
9384#
9385# the answer is one of:
9386#	$0B	log10(2)	(inexact)
9387#	$0C	e		(inexact)
9388#	$0D	log2(e)		(inexact)
9389#	$0E	log10(e)	(exact)
9390#
9391# fetch a pointer to the answer table relating to the proper rounding
9392# precision.
9393#
9394sm_tbl:
9395	subi.b		&0xb,%d1		# make offset in 0-4 range
9396	tst.b		%d0			# is rmode RN?
9397	bne.b		sm_not_rn		# no
9398sm_rn:
9399	lea.l		SMALRN(%pc),%a0		# yes; load RN table addr
9400sm_tbl_cont:
9401	cmpi.b		%d1,&0x2		# is result log10(e)?
9402	ble.b		set_finx		# no; answer is inexact
9403	bra.b		no_finx			# yes; answer is exact
9404sm_not_rn:
9405	cmpi.b		%d0,&rp_mode		# is rmode RP?
9406	beq.b		sm_rp			# yes
9407sm_rzrm:
9408	lea.l		SMALRZRM(%pc),%a0	# no; load RZ,RM table addr
9409	bra.b		sm_tbl_cont
9410sm_rp:
9411	lea.l		SMALRP(%pc),%a0		# load RP table addr
9412	bra.b		sm_tbl_cont
9413
9414#
9415# the answer is one of:
9416#	$30	ln(2)		(inexact)
9417#	$31	ln(10)		(inexact)
9418#	$32	10^0		(exact)
9419#	$33	10^1		(exact)
9420#	$34	10^2		(exact)
9421#	$35	10^4		(exact)
9422#	$36	10^8		(exact)
9423#	$37	10^16		(exact)
9424#	$38	10^32		(inexact)
9425#	$39	10^64		(inexact)
9426#	$3A	10^128		(inexact)
9427#	$3B	10^256		(inexact)
9428#	$3C	10^512		(inexact)
9429#	$3D	10^1024		(inexact)
9430#	$3E	10^2048		(inexact)
9431#	$3F	10^4096		(inexact)
9432#
9433# fetch a pointer to the answer table relating to the proper rounding
9434# precision.
9435#
9436bg_tbl:
9437	subi.b		&0x30,%d1		# make offset in 0-f range
9438	tst.b		%d0			# is rmode RN?
9439	bne.b		bg_not_rn		# no
9440bg_rn:
9441	lea.l		BIGRN(%pc),%a0		# yes; load RN table addr
9442bg_tbl_cont:
9443	cmpi.b		%d1,&0x1		# is offset <= $31?
9444	ble.b		set_finx		# yes; answer is inexact
9445	cmpi.b		%d1,&0x7		# is $32 <= offset <= $37?
9446	ble.b		no_finx			# yes; answer is exact
9447	bra.b		set_finx		# no; answer is inexact
9448bg_not_rn:
9449	cmpi.b		%d0,&rp_mode		# is rmode RP?
9450	beq.b		bg_rp			# yes
9451bg_rzrm:
9452	lea.l		BIGRZRM(%pc),%a0	# no; load RZ,RM table addr
9453	bra.b		bg_tbl_cont
9454bg_rp:
9455	lea.l		BIGRP(%pc),%a0		# load RP table addr
9456	bra.b		bg_tbl_cont
9457
9458# answer is inexact, so set INEX2 and AINEX in the user's FPSR.
9459set_finx:
9460	ori.l		&inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
9461no_finx:
9462	mulu.w		&0xc,%d1		# offset points into tables
9463	swap		%d0			# put rnd prec in lo word
9464	tst.b		%d0			# is precision extended?
9465
9466	bne.b		not_ext			# if xprec, do not call round
9467
9468# Precision is extended
9469	fmovm.x		(%a0,%d1.w),&0x80	# return result in fp0
9470	rts
9471
9472# Precision is single or double
9473not_ext:
9474	swap		%d0			# rnd prec in upper word
9475
9476# call round() to round the answer to the proper precision.
9477# exponents out of range for single or double DO NOT cause underflow
9478# or overflow.
9479	mov.w		0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
9480	mov.l		0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
9481	mov.l		0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
9482	mov.l		%d0,%d1
9483	clr.l		%d0			# clear g,r,s
9484	lea		FP_SCR1(%a6),%a0	# pass ptr to answer
9485	clr.w		LOCAL_SGN(%a0)		# sign always positive
9486	bsr.l		_round			# round the mantissa
9487
9488	fmovm.x		(%a0),&0x80		# return rounded result in fp0
9489	rts
9490
9491	align		0x4
9492
9493PIRN:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
9494PIRZRM:	long		0x40000000,0xc90fdaa2,0x2168c234	# pi
9495PIRP:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
9496
9497SMALRN:	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
9498	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
9499	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
9500	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9501	long		0x00000000,0x00000000,0x00000000	# 0.0
9502
9503SMALRZRM:
9504	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
9505	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
9506	long		0x3fff0000,0xb8aa3b29,0x5c17f0bb	# log2(e)
9507	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9508	long		0x00000000,0x00000000,0x00000000	# 0.0
9509
9510SMALRP:	long		0x3ffd0000,0x9a209a84,0xfbcff799	# log10(2)
9511	long		0x40000000,0xadf85458,0xa2bb4a9b	# e
9512	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
9513	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9514	long		0x00000000,0x00000000,0x00000000	# 0.0
9515
9516BIGRN:	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
9517	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
9518
9519	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9520	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9521	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9522	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9523	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9524	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9525	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
9526	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
9527	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
9528	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
9529	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
9530	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
9531	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
9532	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
9533
9534BIGRZRM:
9535	long		0x3ffe0000,0xb17217f7,0xd1cf79ab	# ln(2)
9536	long		0x40000000,0x935d8ddd,0xaaa8ac16	# ln(10)
9537
9538	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9539	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9540	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9541	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9542	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9543	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9544	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
9545	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
9546	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
9547	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
9548	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
9549	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
9550	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
9551	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
9552
9553BIGRP:
9554	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
9555	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
9556
9557	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9558	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9559	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9560	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9561	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9562	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9563	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
9564	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
9565	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
9566	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
9567	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
9568	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
9569	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
9570	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
9571
9572#########################################################################
9573# sscale(): computes the destination operand scaled by the source	#
9574#	    operand. If the absoulute value of the source operand is 	#
9575#	    >= 2^14, an overflow or underflow is returned.		#
9576#									#
9577# INPUT *************************************************************** #
9578#	a0  = pointer to double-extended source operand X		#
9579#	a1  = pointer to double-extended destination operand Y		#
9580#									#
9581# OUTPUT ************************************************************** #
9582#	fp0 =  scale(X,Y)						#
9583#									#
9584#########################################################################
9585
9586set	SIGN,		L_SCR1
9587
9588	global		sscale
9589sscale:
9590	mov.l		%d0,-(%sp)		# store off ctrl bits for now
9591
9592	mov.w		DST_EX(%a1),%d1		# get dst exponent
9593	smi.b		SIGN(%a6)		# use SIGN to hold dst sign
9594	andi.l		&0x00007fff,%d1		# strip sign from dst exp
9595
9596	mov.w		SRC_EX(%a0),%d0		# check src bounds
9597	andi.w		&0x7fff,%d0		# clr src sign bit
9598	cmpi.w		%d0,&0x3fff		# is src ~ ZERO?
9599	blt.w		src_small		# yes
9600	cmpi.w		%d0,&0x400c		# no; is src too big?
9601	bgt.w		src_out			# yes
9602
9603#
9604# Source is within 2^14 range.
9605#
9606src_ok:
9607	fintrz.x	SRC(%a0),%fp0		# calc int of src
9608	fmov.l		%fp0,%d0		# int src to d0
9609# don't want any accrued bits from the fintrz showing up later since
9610# we may need to read the fpsr for the last fp op in t_catch2().
9611	fmov.l		&0x0,%fpsr
9612
9613	tst.b		DST_HI(%a1)		# is dst denormalized?
9614	bmi.b		sok_norm
9615
9616# the dst is a DENORM. normalize the DENORM and add the adjustment to
9617# the src value. then, jump to the norm part of the routine.
9618sok_dnrm:
9619	mov.l		%d0,-(%sp)		# save src for now
9620
9621	mov.w		DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9622	mov.l		DST_HI(%a1),FP_SCR0_HI(%a6)
9623	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
9624
9625	lea		FP_SCR0(%a6),%a0	# pass ptr to DENORM
9626	bsr.l		norm			# normalize the DENORM
9627	neg.l		%d0
9628	add.l		(%sp)+,%d0		# add adjustment to src
9629
9630	fmovm.x		FP_SCR0(%a6),&0x80	# load normalized DENORM
9631
9632	cmpi.w		%d0,&-0x3fff		# is the shft amt really low?
9633	bge.b		sok_norm2		# thank goodness no
9634
9635# the multiply factor that we're trying to create should be a denorm
9636# for the multiply to work. therefore, we're going to actually do a
9637# multiply with a denorm which will cause an unimplemented data type
9638# exception to be put into the machine which will be caught and corrected
9639# later. we don't do this with the DENORMs above because this method
9640# is slower. but, don't fret, I don't see it being used much either.
9641	fmov.l		(%sp)+,%fpcr		# restore user fpcr
9642	mov.l		&0x80000000,%d1		# load normalized mantissa
9643	subi.l		&-0x3fff,%d0		# how many should we shift?
9644	neg.l		%d0			# make it positive
9645	cmpi.b		%d0,&0x20		# is it > 32?
9646	bge.b		sok_dnrm_32		# yes
9647	lsr.l		%d0,%d1			# no; bit stays in upper lw
9648	clr.l		-(%sp)			# insert zero low mantissa
9649	mov.l		%d1,-(%sp)		# insert new high mantissa
9650	clr.l		-(%sp)			# make zero exponent
9651	bra.b		sok_norm_cont
9652sok_dnrm_32:
9653	subi.b		&0x20,%d0		# get shift count
9654	lsr.l		%d0,%d1			# make low mantissa longword
9655	mov.l		%d1,-(%sp)		# insert new low mantissa
9656	clr.l		-(%sp)			# insert zero high mantissa
9657	clr.l		-(%sp)			# make zero exponent
9658	bra.b		sok_norm_cont
9659
9660# the src will force the dst to a DENORM value or worse. so, let's
9661# create an fp multiply that will create the result.
9662sok_norm:
9663	fmovm.x		DST(%a1),&0x80		# load fp0 with normalized src
9664sok_norm2:
9665	fmov.l		(%sp)+,%fpcr		# restore user fpcr
9666
9667	addi.w		&0x3fff,%d0		# turn src amt into exp value
9668	swap		%d0			# put exponent in high word
9669	clr.l		-(%sp)			# insert new exponent
9670	mov.l		&0x80000000,-(%sp)	# insert new high mantissa
9671	mov.l		%d0,-(%sp)		# insert new lo mantissa
9672
9673sok_norm_cont:
9674	fmov.l		%fpcr,%d0		# d0 needs fpcr for t_catch2
9675	mov.b		&FMUL_OP,%d1		# last inst is MUL
9676	fmul.x		(%sp)+,%fp0		# do the multiply
9677	bra		t_catch2		# catch any exceptions
9678
9679#
9680# Source is outside of 2^14 range.  Test the sign and branch
9681# to the appropriate exception handler.
9682#
9683src_out:
9684	mov.l		(%sp)+,%d0		# restore ctrl bits
9685	exg		%a0,%a1			# swap src,dst ptrs
9686	tst.b		SRC_EX(%a1)		# is src negative?
9687	bmi		t_unfl			# yes; underflow
9688	bra		t_ovfl_sc		# no; overflow
9689
9690#
9691# The source input is below 1, so we check for denormalized numbers
9692# and set unfl.
9693#
9694src_small:
9695	tst.b		DST_HI(%a1)		# is dst denormalized?
9696	bpl.b		ssmall_done		# yes
9697
9698	mov.l		(%sp)+,%d0
9699	fmov.l		%d0,%fpcr		# no; load control bits
9700	mov.b		&FMOV_OP,%d1		# last inst is MOVE
9701	fmov.x		DST(%a1),%fp0		# simply return dest
9702	bra		t_catch2
9703ssmall_done:
9704	mov.l		(%sp)+,%d0		# load control bits into d1
9705	mov.l		%a1,%a0			# pass ptr to dst
9706	bra		t_resdnrm
9707
9708#########################################################################
9709# smod(): computes the fp MOD of the input values X,Y.			#
9710# srem(): computes the fp (IEEE) REM of the input values X,Y.		#
9711#									#
9712# INPUT *************************************************************** #
9713#	a0 = pointer to extended precision input X			#
9714#	a1 = pointer to extended precision input Y			#
9715#	d0 = round precision,mode					#
9716#									#
9717# 	The input operands X and Y can be either normalized or 		#
9718#	denormalized.							#
9719#									#
9720# OUTPUT ************************************************************** #
9721#      fp0 = FREM(X,Y) or FMOD(X,Y)					#
9722#									#
9723# ALGORITHM *********************************************************** #
9724#									#
9725#       Step 1.  Save and strip signs of X and Y: signX := sign(X),	#
9726#                signY := sign(Y), X := |X|, Y := |Y|, 			#
9727#                signQ := signX EOR signY. Record whether MOD or REM	#
9728#                is requested.						#
9729#									#
9730#       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.		#
9731#                If (L < 0) then					#
9732#                   R := X, go to Step 4.				#
9733#                else							#
9734#                   R := 2^(-L)X, j := L.				#
9735#                endif							#
9736#									#
9737#       Step 3.  Perform MOD(X,Y)					#
9738#            3.1 If R = Y, go to Step 9.				#
9739#            3.2 If R > Y, then { R := R - Y, Q := Q + 1}		#
9740#            3.3 If j = 0, go to Step 4.				#
9741#            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to	#
9742#                Step 3.1.						#
9743#									#
9744#       Step 4.  At this point, R = X - QY = MOD(X,Y). Set		#
9745#                Last_Subtract := false (used in Step 7 below). If	#
9746#                MOD is requested, go to Step 6. 			#
9747#									#
9748#       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.		#
9749#            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to	#
9750#                Step 6.						#
9751#            5.2 If R > Y/2, then { set Last_Subtract := true,		#
9752#                Q := Q + 1, Y := signY*Y }. Go to Step 6.		#
9753#            5.3 This is the tricky case of R = Y/2. If Q is odd,	#
9754#                then { Q := Q + 1, signX := -signX }.			#
9755#									#
9756#       Step 6.  R := signX*R.						#
9757#									#
9758#       Step 7.  If Last_Subtract = true, R := R - Y.			#
9759#									#
9760#       Step 8.  Return signQ, last 7 bits of Q, and R as required.	#
9761#									#
9762#       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,		#
9763#                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),		#
9764#                R := 0. Return signQ, last 7 bits of Q, and R.		#
9765#									#
9766#########################################################################
9767
9768	set		Mod_Flag,L_SCR3
9769	set		Sc_Flag,L_SCR3+1
9770
9771	set		SignY,L_SCR2
9772	set		SignX,L_SCR2+2
9773	set		SignQ,L_SCR3+2
9774
9775	set		Y,FP_SCR0
9776	set		Y_Hi,Y+4
9777	set		Y_Lo,Y+8
9778
9779	set		R,FP_SCR1
9780	set		R_Hi,R+4
9781	set		R_Lo,R+8
9782
9783Scale:
9784	long		0x00010000,0x80000000,0x00000000,0x00000000
9785
9786	global		smod
9787smod:
9788	clr.b		FPSR_QBYTE(%a6)
9789	mov.l		%d0,-(%sp)		# save ctrl bits
9790	clr.b		Mod_Flag(%a6)
9791	bra.b		Mod_Rem
9792
9793	global		srem
9794srem:
9795	clr.b		FPSR_QBYTE(%a6)
9796	mov.l		%d0,-(%sp)		# save ctrl bits
9797	mov.b		&0x1,Mod_Flag(%a6)
9798
9799Mod_Rem:
9800#..Save sign of X and Y
9801	movm.l		&0x3f00,-(%sp)		# save data registers
9802	mov.w		SRC_EX(%a0),%d3
9803	mov.w		%d3,SignY(%a6)
9804	and.l		&0x00007FFF,%d3		# Y := |Y|
9805
9806#
9807	mov.l		SRC_HI(%a0),%d4
9808	mov.l		SRC_LO(%a0),%d5		# (D3,D4,D5) is |Y|
9809
9810	tst.l		%d3
9811	bne.b		Y_Normal
9812
9813	mov.l		&0x00003FFE,%d3		# $3FFD + 1
9814	tst.l		%d4
9815	bne.b		HiY_not0
9816
9817HiY_0:
9818	mov.l		%d5,%d4
9819	clr.l		%d5
9820	sub.l		&32,%d3
9821	clr.l		%d6
9822	bfffo		%d4{&0:&32},%d6
9823	lsl.l		%d6,%d4
9824	sub.l		%d6,%d3			# (D3,D4,D5) is normalized
9825#	                                        ...with bias $7FFD
9826	bra.b		Chk_X
9827
9828HiY_not0:
9829	clr.l		%d6
9830	bfffo		%d4{&0:&32},%d6
9831	sub.l		%d6,%d3
9832	lsl.l		%d6,%d4
9833	mov.l		%d5,%d7			# a copy of D5
9834	lsl.l		%d6,%d5
9835	neg.l		%d6
9836	add.l		&32,%d6
9837	lsr.l		%d6,%d7
9838	or.l		%d7,%d4			# (D3,D4,D5) normalized
9839#                                       ...with bias $7FFD
9840	bra.b		Chk_X
9841
9842Y_Normal:
9843	add.l		&0x00003FFE,%d3		# (D3,D4,D5) normalized
9844#                                       ...with bias $7FFD
9845
9846Chk_X:
9847	mov.w		DST_EX(%a1),%d0
9848	mov.w		%d0,SignX(%a6)
9849	mov.w		SignY(%a6),%d1
9850	eor.l		%d0,%d1
9851	and.l		&0x00008000,%d1
9852	mov.w		%d1,SignQ(%a6)		# sign(Q) obtained
9853	and.l		&0x00007FFF,%d0
9854	mov.l		DST_HI(%a1),%d1
9855	mov.l		DST_LO(%a1),%d2		# (D0,D1,D2) is |X|
9856	tst.l		%d0
9857	bne.b		X_Normal
9858	mov.l		&0x00003FFE,%d0
9859	tst.l		%d1
9860	bne.b		HiX_not0
9861
9862HiX_0:
9863	mov.l		%d2,%d1
9864	clr.l		%d2
9865	sub.l		&32,%d0
9866	clr.l		%d6
9867	bfffo		%d1{&0:&32},%d6
9868	lsl.l		%d6,%d1
9869	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
9870#                                       ...with bias $7FFD
9871	bra.b		Init
9872
9873HiX_not0:
9874	clr.l		%d6
9875	bfffo		%d1{&0:&32},%d6
9876	sub.l		%d6,%d0
9877	lsl.l		%d6,%d1
9878	mov.l		%d2,%d7			# a copy of D2
9879	lsl.l		%d6,%d2
9880	neg.l		%d6
9881	add.l		&32,%d6
9882	lsr.l		%d6,%d7
9883	or.l		%d7,%d1			# (D0,D1,D2) normalized
9884#                                       ...with bias $7FFD
9885	bra.b		Init
9886
9887X_Normal:
9888	add.l		&0x00003FFE,%d0		# (D0,D1,D2) normalized
9889#                                       ...with bias $7FFD
9890
9891Init:
9892#
9893	mov.l		%d3,L_SCR1(%a6)		# save biased exp(Y)
9894	mov.l		%d0,-(%sp)		# save biased exp(X)
9895	sub.l		%d3,%d0			# L := expo(X)-expo(Y)
9896
9897	clr.l		%d6			# D6 := carry <- 0
9898	clr.l		%d3			# D3 is Q
9899	mov.l		&0,%a1			# A1 is k; j+k=L, Q=0
9900
9901#..(Carry,D1,D2) is R
9902	tst.l		%d0
9903	bge.b		Mod_Loop_pre
9904
9905#..expo(X) < expo(Y). Thus X = mod(X,Y)
9906#
9907	mov.l		(%sp)+,%d0		# restore d0
9908	bra.w		Get_Mod
9909
9910Mod_Loop_pre:
9911	addq.l		&0x4,%sp		# erase exp(X)
9912#..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
9913Mod_Loop:
9914	tst.l		%d6			# test carry bit
9915	bgt.b		R_GT_Y
9916
9917#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9918	cmp.l		%d1,%d4			# compare hi(R) and hi(Y)
9919	bne.b		R_NE_Y
9920	cmp.l		%d2,%d5			# compare lo(R) and lo(Y)
9921	bne.b		R_NE_Y
9922
9923#..At this point, R = Y
9924	bra.w		Rem_is_0
9925
9926R_NE_Y:
9927#..use the borrow of the previous compare
9928	bcs.b		R_LT_Y			# borrow is set iff R < Y
9929
9930R_GT_Y:
9931#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9932#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9933	sub.l		%d5,%d2			# lo(R) - lo(Y)
9934	subx.l		%d4,%d1			# hi(R) - hi(Y)
9935	clr.l		%d6			# clear carry
9936	addq.l		&1,%d3			# Q := Q + 1
9937
9938R_LT_Y:
9939#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9940	tst.l		%d0			# see if j = 0.
9941	beq.b		PostLoop
9942
9943	add.l		%d3,%d3			# Q := 2Q
9944	add.l		%d2,%d2			# lo(R) = 2lo(R)
9945	roxl.l		&1,%d1			# hi(R) = 2hi(R) + carry
9946	scs		%d6			# set Carry if 2(R) overflows
9947	addq.l		&1,%a1			# k := k+1
9948	subq.l		&1,%d0			# j := j - 1
9949#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9950
9951	bra.b		Mod_Loop
9952
9953PostLoop:
9954#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9955
9956#..normalize R.
9957	mov.l		L_SCR1(%a6),%d0		# new biased expo of R
9958	tst.l		%d1
9959	bne.b		HiR_not0
9960
9961HiR_0:
9962	mov.l		%d2,%d1
9963	clr.l		%d2
9964	sub.l		&32,%d0
9965	clr.l		%d6
9966	bfffo		%d1{&0:&32},%d6
9967	lsl.l		%d6,%d1
9968	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
9969#                                       ...with bias $7FFD
9970	bra.b		Get_Mod
9971
9972HiR_not0:
9973	clr.l		%d6
9974	bfffo		%d1{&0:&32},%d6
9975	bmi.b		Get_Mod			# already normalized
9976	sub.l		%d6,%d0
9977	lsl.l		%d6,%d1
9978	mov.l		%d2,%d7			# a copy of D2
9979	lsl.l		%d6,%d2
9980	neg.l		%d6
9981	add.l		&32,%d6
9982	lsr.l		%d6,%d7
9983	or.l		%d7,%d1			# (D0,D1,D2) normalized
9984
9985#
9986Get_Mod:
9987	cmp.l		%d0,&0x000041FE
9988	bge.b		No_Scale
9989Do_Scale:
9990	mov.w		%d0,R(%a6)
9991	mov.l		%d1,R_Hi(%a6)
9992	mov.l		%d2,R_Lo(%a6)
9993	mov.l		L_SCR1(%a6),%d6
9994	mov.w		%d6,Y(%a6)
9995	mov.l		%d4,Y_Hi(%a6)
9996	mov.l		%d5,Y_Lo(%a6)
9997	fmov.x		R(%a6),%fp0		# no exception
9998	mov.b		&1,Sc_Flag(%a6)
9999	bra.b		ModOrRem
10000No_Scale:
10001	mov.l		%d1,R_Hi(%a6)
10002	mov.l		%d2,R_Lo(%a6)
10003	sub.l		&0x3FFE,%d0
10004	mov.w		%d0,R(%a6)
10005	mov.l		L_SCR1(%a6),%d6
10006	sub.l		&0x3FFE,%d6
10007	mov.l		%d6,L_SCR1(%a6)
10008	fmov.x		R(%a6),%fp0
10009	mov.w		%d6,Y(%a6)
10010	mov.l		%d4,Y_Hi(%a6)
10011	mov.l		%d5,Y_Lo(%a6)
10012	clr.b		Sc_Flag(%a6)
10013
10014#
10015ModOrRem:
10016	tst.b		Mod_Flag(%a6)
10017	beq.b		Fix_Sign
10018
10019	mov.l		L_SCR1(%a6),%d6		# new biased expo(Y)
10020	subq.l		&1,%d6			# biased expo(Y/2)
10021	cmp.l		%d0,%d6
10022	blt.b		Fix_Sign
10023	bgt.b		Last_Sub
10024
10025	cmp.l		%d1,%d4
10026	bne.b		Not_EQ
10027	cmp.l		%d2,%d5
10028	bne.b		Not_EQ
10029	bra.w		Tie_Case
10030
10031Not_EQ:
10032	bcs.b		Fix_Sign
10033
10034Last_Sub:
10035#
10036	fsub.x		Y(%a6),%fp0		# no exceptions
10037	addq.l		&1,%d3			# Q := Q + 1
10038
10039#
10040Fix_Sign:
10041#..Get sign of X
10042	mov.w		SignX(%a6),%d6
10043	bge.b		Get_Q
10044	fneg.x		%fp0
10045
10046#..Get Q
10047#
10048Get_Q:
10049	clr.l		%d6
10050	mov.w		SignQ(%a6),%d6		# D6 is sign(Q)
10051	mov.l		&8,%d7
10052	lsr.l		%d7,%d6
10053	and.l		&0x0000007F,%d3		# 7 bits of Q
10054	or.l		%d6,%d3			# sign and bits of Q
10055#	swap		%d3
10056#	fmov.l		%fpsr,%d6
10057#	and.l		&0xFF00FFFF,%d6
10058#	or.l		%d3,%d6
10059#	fmov.l		%d6,%fpsr		# put Q in fpsr
10060	mov.b		%d3,FPSR_QBYTE(%a6)	# put Q in fpsr
10061
10062#
10063Restore:
10064	movm.l		(%sp)+,&0xfc		#  {%d2-%d7}
10065	mov.l		(%sp)+,%d0
10066	fmov.l		%d0,%fpcr
10067	tst.b		Sc_Flag(%a6)
10068	beq.b		Finish
10069	mov.b		&FMUL_OP,%d1		# last inst is MUL
10070	fmul.x		Scale(%pc),%fp0		# may cause underflow
10071	bra		t_catch2
10072# the '040 package did this apparently to see if the dst operand for the
10073# preceding fmul was a denorm. but, it better not have been since the
10074# algorithm just got done playing with fp0 and expected no exceptions
10075# as a result. trust me...
10076#	bra		t_avoid_unsupp		# check for denorm as a
10077#						;result of the scaling
10078
10079Finish:
10080	mov.b		&FMOV_OP,%d1		# last inst is MOVE
10081	fmov.x		%fp0,%fp0		# capture exceptions & round
10082	bra		t_catch2
10083
10084Rem_is_0:
10085#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
10086	addq.l		&1,%d3
10087	cmp.l		%d0,&8			# D0 is j
10088	bge.b		Q_Big
10089
10090	lsl.l		%d0,%d3
10091	bra.b		Set_R_0
10092
10093Q_Big:
10094	clr.l		%d3
10095
10096Set_R_0:
10097	fmov.s		&0x00000000,%fp0
10098	clr.b		Sc_Flag(%a6)
10099	bra.w		Fix_Sign
10100
10101Tie_Case:
10102#..Check parity of Q
10103	mov.l		%d3,%d6
10104	and.l		&0x00000001,%d6
10105	tst.l		%d6
10106	beq.w		Fix_Sign		# Q is even
10107
10108#..Q is odd, Q := Q + 1, signX := -signX
10109	addq.l		&1,%d3
10110	mov.w		SignX(%a6),%d6
10111	eor.l		&0x00008000,%d6
10112	mov.w		%d6,SignX(%a6)
10113	bra.w		Fix_Sign
10114
10115qnan:	long		0x7fff0000, 0xffffffff, 0xffffffff
10116
10117#########################################################################
10118# XDEF ****************************************************************	#
10119#	t_dz(): Handle DZ exception during transcendental emulation.	#
10120#	        Sets N bit according to sign of source operand.		#
10121#	t_dz2(): Handle DZ exception during transcendental emulation.	#
10122#		 Sets N bit always.					#
10123#									#
10124# XREF ****************************************************************	#
10125#	None								#
10126#									#
10127# INPUT ***************************************************************	#
10128#	a0 = pointer to source operand					#
10129# 									#
10130# OUTPUT **************************************************************	#
10131#	fp0 = default result						#
10132#									#
10133# ALGORITHM ***********************************************************	#
10134#	- Store properly signed INF into fp0.				#
10135#	- Set FPSR exception status dz bit, ccode inf bit, and 		#
10136#	  accrued dz bit.						#
10137#									#
10138#########################################################################
10139
10140	global		t_dz
10141t_dz:
10142	tst.b		SRC_EX(%a0) 		# no; is src negative?
10143	bmi.b		t_dz2			# yes
10144
10145dz_pinf:
10146	fmov.s		&0x7f800000,%fp0	# return +INF in fp0
10147	ori.l		&dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
10148	rts
10149
10150	global		t_dz2
10151t_dz2:
10152	fmov.s		&0xff800000,%fp0	# return -INF in fp0
10153	ori.l		&dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
10154	rts
10155
10156#################################################################
10157# OPERR exception:						#
10158#	- set FPSR exception status operr bit, condition code 	#
10159#	  nan bit; Store default NAN into fp0			#
10160#################################################################
10161	global		t_operr
10162t_operr:
10163	ori.l		&opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
10164	fmovm.x		qnan(%pc),&0x80		# return default NAN in fp0
10165	rts
10166
10167#################################################################
10168# Extended DENORM:						#
10169# 	- For all functions that have a denormalized input and	#
10170#	  that f(x)=x, this is the entry point.			#
10171#	- we only return the EXOP here if either underflow or	#
10172#	  inexact is enabled.					#
10173#################################################################
10174
10175# Entry point for scale w/ extended denorm. The function does
10176# NOT set INEX2/AUNFL/AINEX.
10177	global		t_resdnrm
10178t_resdnrm:
10179	ori.l		&unfl_mask,USER_FPSR(%a6) # set UNFL
10180	bra.b		xdnrm_con
10181
10182	global		t_extdnrm
10183t_extdnrm:
10184	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10185
10186xdnrm_con:
10187	mov.l		%a0,%a1			# make copy of src ptr
10188	mov.l		%d0,%d1			# make copy of rnd prec,mode
10189	andi.b		&0xc0,%d1		# extended precision?
10190	bne.b		xdnrm_sd		# no
10191
10192# result precision is extended.
10193	tst.b		LOCAL_EX(%a0)		# is denorm negative?
10194	bpl.b		xdnrm_exit		# no
10195
10196	bset		&neg_bit,FPSR_CC(%a6)	# yes; set 'N' ccode bit
10197	bra.b		xdnrm_exit
10198
10199# result precision is single or double
10200xdnrm_sd:
10201	mov.l		%a1,-(%sp)
10202	tst.b		LOCAL_EX(%a0)		# is denorm pos or neg?
10203	smi.b		%d1			# set d0 accodingly
10204	bsr.l		unf_sub
10205	mov.l		(%sp)+,%a1
10206xdnrm_exit:
10207	fmovm.x		(%a0),&0x80		# return default result in fp0
10208
10209	mov.b		FPCR_ENABLE(%a6),%d0
10210	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
10211	bne.b		xdnrm_ena		# yes
10212	rts
10213
10214################
10215# unfl enabled #
10216################
10217# we have a DENORM that needs to be converted into an EXOP.
10218# so, normalize the mantissa, add 0x6000 to the new exponent,
10219# and return the result in fp1.
10220xdnrm_ena:
10221	mov.w		LOCAL_EX(%a1),FP_SCR0_EX(%a6)
10222	mov.l		LOCAL_HI(%a1),FP_SCR0_HI(%a6)
10223	mov.l		LOCAL_LO(%a1),FP_SCR0_LO(%a6)
10224
10225	lea		FP_SCR0(%a6),%a0
10226	bsr.l		norm			# normalize mantissa
10227	addi.l		&0x6000,%d0		# add extra bias
10228	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep old sign
10229	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
10230
10231	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10232	rts
10233
10234#################################################################
10235# UNFL exception:						#
10236# 	- This routine is for cases where even an EXOP isn't	#
10237#  	  large enough to hold the range of this result.	#
10238#	  In such a case, the EXOP equals zero.			#
10239#  	- Return the default result to the proper precision 	#
10240#	  with the sign of this result being the same as that	#
10241#	  of the src operand.					#
10242# 	- t_unfl2() is provided to force the result sign to 	#
10243#	  positive which is the desired result for fetox().	#
10244#################################################################
10245	global		t_unfl
10246t_unfl:
10247	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10248
10249	tst.b		(%a0)			# is result pos or neg?
10250	smi.b		%d1			# set d1 accordingly
10251	bsr.l		unf_sub			# calc default unfl result
10252	fmovm.x		(%a0),&0x80		# return default result in fp0
10253
10254	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10255	rts
10256
10257# t_unfl2 ALWAYS tells unf_sub to create a positive result
10258	global		t_unfl2
10259t_unfl2:
10260	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10261
10262	sf.b		%d1			# set d0 to represent positive
10263	bsr.l		unf_sub			# calc default unfl result
10264	fmovm.x		(%a0),&0x80		# return default result in fp0
10265
10266	fmov.s		&0x0000000,%fp1		# return EXOP in fp1
10267	rts
10268
10269#################################################################
10270# OVFL exception:						#
10271# 	- This routine is for cases where even an EXOP isn't	#
10272#  	  large enough to hold the range of this result.	#
10273# 	- Return the default result to the proper precision 	#
10274#	  with the sign of this result being the same as that 	#
10275#	  of the src operand.					#
10276# 	- t_ovfl2() is provided to force the result sign to 	#
10277#	  positive which is the desired result for fcosh().	#
10278# 	- t_ovfl_sc() is provided for scale() which only sets 	#
10279#	  the inexact bits if the number is inexact for the 	#
10280#	  precision indicated.					#
10281#################################################################
10282
10283	global		t_ovfl_sc
10284t_ovfl_sc:
10285	ori.l		&ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10286
10287	mov.b		%d0,%d1			# fetch rnd mode/prec
10288	andi.b		&0xc0,%d1		# extract rnd prec
10289	beq.b		ovfl_work		# prec is extended
10290
10291	tst.b		LOCAL_HI(%a0)		# is dst a DENORM?
10292	bmi.b		ovfl_sc_norm		# no
10293
10294# dst op is a DENORM. we have to normalize the mantissa to see if the
10295# result would be inexact for the given precision. make a copy of the
10296# dst so we don't screw up the version passed to us.
10297	mov.w		LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10298	mov.l		LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10299	mov.l		LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10300	lea		FP_SCR0(%a6),%a0	# pass ptr to FP_SCR0
10301	movm.l		&0xc080,-(%sp)		# save d0-d1/a0
10302	bsr.l		norm			# normalize mantissa
10303	movm.l		(%sp)+,&0x0103		# restore d0-d1/a0
10304
10305ovfl_sc_norm:
10306	cmpi.b		%d1,&0x40		# is prec dbl?
10307	bne.b		ovfl_sc_dbl		# no; sgl
10308ovfl_sc_sgl:
10309	tst.l		LOCAL_LO(%a0)		# is lo lw of sgl set?
10310	bne.b		ovfl_sc_inx		# yes
10311	tst.b		3+LOCAL_HI(%a0)		# is lo byte of hi lw set?
10312	bne.b		ovfl_sc_inx		# yes
10313	bra.b		ovfl_work		# don't set INEX2
10314ovfl_sc_dbl:
10315	mov.l		LOCAL_LO(%a0),%d1	# are any of lo 11 bits of
10316	andi.l		&0x7ff,%d1		# dbl mantissa set?
10317	beq.b		ovfl_work		# no; don't set INEX2
10318ovfl_sc_inx:
10319	ori.l		&inex2_mask,USER_FPSR(%a6) # set INEX2
10320	bra.b		ovfl_work		# continue
10321
10322	global		t_ovfl
10323t_ovfl:
10324	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10325
10326ovfl_work:
10327	tst.b		LOCAL_EX(%a0)		# what is the sign?
10328	smi.b		%d1			# set d1 accordingly
10329	bsr.l		ovf_res			# calc default ovfl result
10330	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
10331	fmovm.x		(%a0),&0x80		# return default result in fp0
10332
10333	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10334	rts
10335
10336# t_ovfl2 ALWAYS tells ovf_res to create a positive result
10337	global		t_ovfl2
10338t_ovfl2:
10339	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10340
10341	sf.b		%d1			# clear sign flag for positive
10342	bsr.l		ovf_res			# calc default ovfl result
10343	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
10344	fmovm.x		(%a0),&0x80		# return default result in fp0
10345
10346	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10347	rts
10348
10349#################################################################
10350# t_catch(): 							#
10351#	- the last operation of a transcendental emulation	#
10352# 	  routine may have caused an underflow or overflow. 	#
10353# 	  we find out if this occurred by doing an fsave and 	#
10354#	  checking the exception bit. if one did occur, then we	#
10355#	  jump to fgen_except() which creates the default	#
10356#	  result and EXOP for us.				#
10357#################################################################
10358	global		t_catch
10359t_catch:
10360
10361	fsave		-(%sp)
10362	tst.b		0x2(%sp)
10363	bmi.b		catch
10364	add.l		&0xc,%sp
10365
10366#################################################################
10367# INEX2 exception:						#
10368#	- The inex2 and ainex bits are set.			#
10369#################################################################
10370	global		t_inx2
10371t_inx2:
10372	fblt.w		t_minx2
10373	fbeq.w		inx2_zero
10374
10375	global		t_pinx2
10376t_pinx2:
10377	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10378	rts
10379
10380	global		t_minx2
10381t_minx2:
10382	ori.l		&inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
10383	rts
10384
10385inx2_zero:
10386	mov.b		&z_bmask,FPSR_CC(%a6)
10387	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10388	rts
10389
10390# an underflow or overflow exception occurred.
10391# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
10392catch:
10393	ori.w		&inx2a_mask,FPSR_EXCEPT(%a6)
10394catch2:
10395	bsr.l		fgen_except
10396	add.l		&0xc,%sp
10397	rts
10398
10399	global		t_catch2
10400t_catch2:
10401
10402	fsave		-(%sp)
10403
10404	tst.b		0x2(%sp)
10405	bmi.b		catch2
10406	add.l		&0xc,%sp
10407
10408	fmov.l		%fpsr,%d0
10409	or.l		%d0,USER_FPSR(%a6)
10410
10411	rts
10412
10413#########################################################################
10414
10415#########################################################################
10416# unf_res(): underflow default result calculation for transcendentals	#
10417#									#
10418# INPUT:								#
10419# 	d0   : rnd mode,precision					#
10420# 	d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+))	#
10421# OUTPUT:								#
10422#	a0   : points to result (in instruction memory)			#
10423#########################################################################
10424unf_sub:
10425	ori.l		&unfinx_mask,USER_FPSR(%a6)
10426
10427	andi.w		&0x10,%d1		# keep sign bit in 4th spot
10428
10429	lsr.b		&0x4,%d0		# shift rnd prec,mode to lo bits
10430	andi.b		&0xf,%d0		# strip hi rnd mode bit
10431	or.b		%d1,%d0			# concat {sgn,mode,prec}
10432
10433	mov.l		%d0,%d1			# make a copy
10434	lsl.b		&0x1,%d1		# mult index 2 by 2
10435
10436	mov.b		(tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
10437	lea		(tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
10438	rts
10439
10440tbl_unf_cc:
10441	byte		0x4, 0x4, 0x4, 0x0
10442	byte		0x4, 0x4, 0x4, 0x0
10443	byte		0x4, 0x4, 0x4, 0x0
10444	byte		0x0, 0x0, 0x0, 0x0
10445	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10446	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10447	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10448
10449tbl_unf_result:
10450	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10451	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10452	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10453	long		0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10454
10455	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10456	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10457	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10458	long		0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10459
10460	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10461	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
10462	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10463	long		0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10464
10465	long		0x0,0x0,0x0,0x0
10466	long		0x0,0x0,0x0,0x0
10467	long		0x0,0x0,0x0,0x0
10468	long		0x0,0x0,0x0,0x0
10469
10470	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10471	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10472	long		0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10473	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10474
10475	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10476	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10477	long		0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10478	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10479
10480	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10481	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10482	long		0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10483	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10484
10485############################################################
10486
10487#########################################################################
10488# src_zero(): Return signed zero according to sign of src operand.	#
10489#########################################################################
10490	global		src_zero
10491src_zero:
10492	tst.b		SRC_EX(%a0)		# get sign of src operand
10493	bmi.b		ld_mzero		# if neg, load neg zero
10494
10495#
10496# ld_pzero(): return a positive zero.
10497#
10498	global		ld_pzero
10499ld_pzero:
10500	fmov.s		&0x00000000,%fp0	# load +0
10501	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10502	rts
10503
10504# ld_mzero(): return a negative zero.
10505	global		ld_mzero
10506ld_mzero:
10507	fmov.s		&0x80000000,%fp0	# load -0
10508	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10509	rts
10510
10511#########################################################################
10512# dst_zero(): Return signed zero according to sign of dst operand.	#
10513#########################################################################
10514	global		dst_zero
10515dst_zero:
10516	tst.b		DST_EX(%a1) 		# get sign of dst operand
10517	bmi.b		ld_mzero		# if neg, load neg zero
10518	bra.b		ld_pzero		# load positive zero
10519
10520#########################################################################
10521# src_inf(): Return signed inf according to sign of src operand.	#
10522#########################################################################
10523	global		src_inf
10524src_inf:
10525	tst.b		SRC_EX(%a0) 		# get sign of src operand
10526	bmi.b		ld_minf			# if negative branch
10527
10528#
10529# ld_pinf(): return a positive infinity.
10530#
10531	global		ld_pinf
10532ld_pinf:
10533	fmov.s		&0x7f800000,%fp0	# load +INF
10534	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'INF' ccode bit
10535	rts
10536
10537#
10538# ld_minf():return a negative infinity.
10539#
10540	global		ld_minf
10541ld_minf:
10542	fmov.s		&0xff800000,%fp0	# load -INF
10543	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10544	rts
10545
10546#########################################################################
10547# dst_inf(): Return signed inf according to sign of dst operand.	#
10548#########################################################################
10549	global		dst_inf
10550dst_inf:
10551	tst.b		DST_EX(%a1) 		# get sign of dst operand
10552	bmi.b		ld_minf			# if negative branch
10553	bra.b		ld_pinf
10554
10555	global		szr_inf
10556#################################################################
10557# szr_inf(): Return +ZERO for a negative src operand or		#
10558#	            +INF for a positive src operand.		#
10559#	     Routine used for fetox, ftwotox, and ftentox.	#
10560#################################################################
10561szr_inf:
10562	tst.b		SRC_EX(%a0)		# check sign of source
10563	bmi.b		ld_pzero
10564	bra.b		ld_pinf
10565
10566#########################################################################
10567# sopr_inf(): Return +INF for a positive src operand or			#
10568#	      jump to operand error routine for a negative src operand.	#
10569#	      Routine used for flogn, flognp1, flog10, and flog2.	#
10570#########################################################################
10571	global		sopr_inf
10572sopr_inf:
10573	tst.b		SRC_EX(%a0)		# check sign of source
10574	bmi.w		t_operr
10575	bra.b		ld_pinf
10576
10577#################################################################
10578# setoxm1i(): Return minus one for a negative src operand or	#
10579#	      positive infinity for a positive src operand.	#
10580#	      Routine used for fetoxm1.				#
10581#################################################################
10582	global		setoxm1i
10583setoxm1i:
10584	tst.b		SRC_EX(%a0)		# check sign of source
10585	bmi.b		ld_mone
10586	bra.b		ld_pinf
10587
10588#########################################################################
10589# src_one(): Return signed one according to sign of src operand.	#
10590#########################################################################
10591	global		src_one
10592src_one:
10593	tst.b		SRC_EX(%a0) 		# check sign of source
10594	bmi.b		ld_mone
10595
10596#
10597# ld_pone(): return positive one.
10598#
10599	global		ld_pone
10600ld_pone:
10601	fmov.s		&0x3f800000,%fp0	# load +1
10602	clr.b		FPSR_CC(%a6)
10603	rts
10604
10605#
10606# ld_mone(): return negative one.
10607#
10608	global		ld_mone
10609ld_mone:
10610	fmov.s		&0xbf800000,%fp0	# load -1
10611	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
10612	rts
10613
10614ppiby2:	long		0x3fff0000, 0xc90fdaa2, 0x2168c235
10615mpiby2:	long		0xbfff0000, 0xc90fdaa2, 0x2168c235
10616
10617#################################################################
10618# spi_2(): Return signed PI/2 according to sign of src operand.	#
10619#################################################################
10620	global		spi_2
10621spi_2:
10622	tst.b		SRC_EX(%a0) 		# check sign of source
10623	bmi.b		ld_mpi2
10624
10625#
10626# ld_ppi2(): return positive PI/2.
10627#
10628	global		ld_ppi2
10629ld_ppi2:
10630	fmov.l		%d0,%fpcr
10631	fmov.x		ppiby2(%pc),%fp0	# load +pi/2
10632	bra.w		t_pinx2			# set INEX2
10633
10634#
10635# ld_mpi2(): return negative PI/2.
10636#
10637	global		ld_mpi2
10638ld_mpi2:
10639	fmov.l		%d0,%fpcr
10640	fmov.x		mpiby2(%pc),%fp0	# load -pi/2
10641	bra.w		t_minx2			# set INEX2
10642
10643####################################################
10644# The following routines give support for fsincos. #
10645####################################################
10646
10647#
10648# ssincosz(): When the src operand is ZERO, store a one in the
10649# 	      cosine register and return a ZERO in fp0 w/ the same sign
10650#	      as the src operand.
10651#
10652	global		ssincosz
10653ssincosz:
10654	fmov.s		&0x3f800000,%fp1
10655	tst.b		SRC_EX(%a0)		# test sign
10656	bpl.b		sincoszp
10657	fmov.s		&0x80000000,%fp0	# return sin result in fp0
10658	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)
10659	bra.b		sto_cos			# store cosine result
10660sincoszp:
10661	fmov.s		&0x00000000,%fp0	# return sin result in fp0
10662	mov.b		&z_bmask,FPSR_CC(%a6)
10663	bra.b		sto_cos			# store cosine result
10664
10665#
10666# ssincosi(): When the src operand is INF, store a QNAN in the cosine
10667#	      register and jump to the operand error routine for negative
10668#	      src operands.
10669#
10670	global		ssincosi
10671ssincosi:
10672	fmov.x		qnan(%pc),%fp1		# load NAN
10673	bsr.l		sto_cos			# store cosine result
10674	bra.w		t_operr
10675
10676#
10677# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10678# 		 register and branch to the src QNAN routine.
10679#
10680	global		ssincosqnan
10681ssincosqnan:
10682	fmov.x		LOCAL_EX(%a0),%fp1
10683	bsr.l		sto_cos
10684	bra.w		src_qnan
10685
10686#
10687# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
10688#		 in the cosine register and branch to the src SNAN routine.
10689#
10690	global		ssincossnan
10691ssincossnan:
10692	fmov.x		LOCAL_EX(%a0),%fp1
10693	bsr.l		sto_cos
10694	bra.w		src_snan
10695
10696########################################################################
10697
10698#########################################################################
10699# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field.	#
10700#	     fp1 holds the result of the cosine portion of ssincos().	#
10701#	     the value in fp1 will not take any exceptions when moved.	#
10702# INPUT:								#
10703#	fp1 : fp value to store						#
10704# MODIFIED:								#
10705#	d0								#
10706#########################################################################
10707	global		sto_cos
10708sto_cos:
10709	mov.b		1+EXC_CMDREG(%a6),%d0
10710	andi.w		&0x7,%d0
10711	mov.w		(tbl_sto_cos.b,%pc,%d0.w*2),%d0
10712	jmp		(tbl_sto_cos.b,%pc,%d0.w*1)
10713
10714tbl_sto_cos:
10715	short		sto_cos_0 - tbl_sto_cos
10716	short		sto_cos_1 - tbl_sto_cos
10717	short		sto_cos_2 - tbl_sto_cos
10718	short		sto_cos_3 - tbl_sto_cos
10719	short		sto_cos_4 - tbl_sto_cos
10720	short		sto_cos_5 - tbl_sto_cos
10721	short		sto_cos_6 - tbl_sto_cos
10722	short		sto_cos_7 - tbl_sto_cos
10723
10724sto_cos_0:
10725	fmovm.x		&0x40,EXC_FP0(%a6)
10726	rts
10727sto_cos_1:
10728	fmovm.x		&0x40,EXC_FP1(%a6)
10729	rts
10730sto_cos_2:
10731	fmov.x 		%fp1,%fp2
10732	rts
10733sto_cos_3:
10734	fmov.x		%fp1,%fp3
10735	rts
10736sto_cos_4:
10737	fmov.x		%fp1,%fp4
10738	rts
10739sto_cos_5:
10740	fmov.x		%fp1,%fp5
10741	rts
10742sto_cos_6:
10743	fmov.x		%fp1,%fp6
10744	rts
10745sto_cos_7:
10746	fmov.x		%fp1,%fp7
10747	rts
10748
10749##################################################################
10750	global		smod_sdnrm
10751	global		smod_snorm
10752smod_sdnrm:
10753smod_snorm:
10754	mov.b		DTAG(%a6),%d1
10755	beq.l		smod
10756	cmpi.b		%d1,&ZERO
10757	beq.w		smod_zro
10758	cmpi.b		%d1,&INF
10759	beq.l		t_operr
10760	cmpi.b		%d1,&DENORM
10761	beq.l		smod
10762	cmpi.b		%d1,&SNAN
10763	beq.l		dst_snan
10764	bra.l		dst_qnan
10765
10766	global		smod_szero
10767smod_szero:
10768	mov.b		DTAG(%a6),%d1
10769	beq.l		t_operr
10770	cmpi.b		%d1,&ZERO
10771	beq.l		t_operr
10772	cmpi.b		%d1,&INF
10773	beq.l		t_operr
10774	cmpi.b		%d1,&DENORM
10775	beq.l		t_operr
10776	cmpi.b		%d1,&QNAN
10777	beq.l		dst_qnan
10778	bra.l		dst_snan
10779
10780	global		smod_sinf
10781smod_sinf:
10782	mov.b		DTAG(%a6),%d1
10783	beq.l		smod_fpn
10784	cmpi.b		%d1,&ZERO
10785	beq.l		smod_zro
10786	cmpi.b		%d1,&INF
10787	beq.l		t_operr
10788	cmpi.b		%d1,&DENORM
10789	beq.l		smod_fpn
10790	cmpi.b		%d1,&QNAN
10791	beq.l		dst_qnan
10792	bra.l		dst_snan
10793
10794smod_zro:
10795srem_zro:
10796	mov.b		SRC_EX(%a0),%d1		# get src sign
10797	mov.b		DST_EX(%a1),%d0		# get dst sign
10798	eor.b		%d0,%d1			# get qbyte sign
10799	andi.b		&0x80,%d1
10800	mov.b		%d1,FPSR_QBYTE(%a6)
10801	tst.b		%d0
10802	bpl.w		ld_pzero
10803	bra.w		ld_mzero
10804
10805smod_fpn:
10806srem_fpn:
10807	clr.b		FPSR_QBYTE(%a6)
10808	mov.l		%d0,-(%sp)
10809	mov.b		SRC_EX(%a0),%d1		# get src sign
10810	mov.b		DST_EX(%a1),%d0		# get dst sign
10811	eor.b		%d0,%d1			# get qbyte sign
10812	andi.b		&0x80,%d1
10813	mov.b		%d1,FPSR_QBYTE(%a6)
10814	cmpi.b		DTAG(%a6),&DENORM
10815	bne.b		smod_nrm
10816	lea		DST(%a1),%a0
10817	mov.l		(%sp)+,%d0
10818	bra		t_resdnrm
10819smod_nrm:
10820	fmov.l		(%sp)+,%fpcr
10821	fmov.x		DST(%a1),%fp0
10822	tst.b		DST_EX(%a1)
10823	bmi.b		smod_nrm_neg
10824	rts
10825
10826smod_nrm_neg:
10827	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode
10828	rts
10829
10830#########################################################################
10831	global		srem_snorm
10832	global		srem_sdnrm
10833srem_sdnrm:
10834srem_snorm:
10835	mov.b		DTAG(%a6),%d1
10836	beq.l		srem
10837	cmpi.b		%d1,&ZERO
10838	beq.w		srem_zro
10839	cmpi.b		%d1,&INF
10840	beq.l		t_operr
10841	cmpi.b		%d1,&DENORM
10842	beq.l		srem
10843	cmpi.b		%d1,&QNAN
10844	beq.l		dst_qnan
10845	bra.l		dst_snan
10846
10847	global		srem_szero
10848srem_szero:
10849	mov.b		DTAG(%a6),%d1
10850	beq.l		t_operr
10851	cmpi.b		%d1,&ZERO
10852	beq.l		t_operr
10853	cmpi.b		%d1,&INF
10854	beq.l		t_operr
10855	cmpi.b		%d1,&DENORM
10856	beq.l		t_operr
10857	cmpi.b		%d1,&QNAN
10858	beq.l		dst_qnan
10859	bra.l		dst_snan
10860
10861	global		srem_sinf
10862srem_sinf:
10863	mov.b		DTAG(%a6),%d1
10864	beq.w		srem_fpn
10865	cmpi.b		%d1,&ZERO
10866	beq.w		srem_zro
10867	cmpi.b		%d1,&INF
10868	beq.l		t_operr
10869	cmpi.b		%d1,&DENORM
10870	beq.l		srem_fpn
10871	cmpi.b		%d1,&QNAN
10872	beq.l		dst_qnan
10873	bra.l		dst_snan
10874
10875#########################################################################
10876	global		sscale_snorm
10877	global		sscale_sdnrm
10878sscale_snorm:
10879sscale_sdnrm:
10880	mov.b		DTAG(%a6),%d1
10881	beq.l		sscale
10882	cmpi.b		%d1,&ZERO
10883	beq.l		dst_zero
10884	cmpi.b		%d1,&INF
10885	beq.l		dst_inf
10886	cmpi.b		%d1,&DENORM
10887	beq.l		sscale
10888	cmpi.b		%d1,&QNAN
10889	beq.l		dst_qnan
10890	bra.l		dst_snan
10891
10892	global		sscale_szero
10893sscale_szero:
10894	mov.b		DTAG(%a6),%d1
10895	beq.l		sscale
10896	cmpi.b		%d1,&ZERO
10897	beq.l		dst_zero
10898	cmpi.b		%d1,&INF
10899	beq.l		dst_inf
10900	cmpi.b		%d1,&DENORM
10901	beq.l		sscale
10902	cmpi.b		%d1,&QNAN
10903	beq.l		dst_qnan
10904	bra.l		dst_snan
10905
10906	global		sscale_sinf
10907sscale_sinf:
10908	mov.b		DTAG(%a6),%d1
10909	beq.l		t_operr
10910	cmpi.b		%d1,&QNAN
10911	beq.l		dst_qnan
10912	cmpi.b		%d1,&SNAN
10913	beq.l		dst_snan
10914	bra.l		t_operr
10915
10916########################################################################
10917
10918#
10919# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
10920#
10921	global		sop_sqnan
10922sop_sqnan:
10923	mov.b		DTAG(%a6),%d1
10924	cmpi.b		%d1,&QNAN
10925	beq.b		dst_qnan
10926	cmpi.b		%d1,&SNAN
10927	beq.b		dst_snan
10928	bra.b		src_qnan
10929
10930#
10931# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
10932#
10933	global		sop_ssnan
10934sop_ssnan:
10935	mov.b		DTAG(%a6),%d1
10936	cmpi.b		%d1,&QNAN
10937	beq.b		dst_qnan_src_snan
10938	cmpi.b		%d1,&SNAN
10939	beq.b		dst_snan
10940	bra.b		src_snan
10941
10942dst_qnan_src_snan:
10943	ori.l		&snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
10944	bra.b		dst_qnan
10945
10946#
10947# dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
10948#
10949	global		dst_snan
10950dst_snan:
10951	fmov.x		DST(%a1),%fp0		# the fmove sets the SNAN bit
10952	fmov.l		%fpsr,%d0		# catch resulting status
10953	or.l		%d0,USER_FPSR(%a6)	# store status
10954	rts
10955
10956#
10957# dst_qnan(): Return the dst QNAN.
10958#
10959	global		dst_qnan
10960dst_qnan:
10961	fmov.x		DST(%a1),%fp0		# return the non-signalling nan
10962	tst.b		DST_EX(%a1)		# set ccodes according to QNAN sign
10963	bmi.b		dst_qnan_m
10964dst_qnan_p:
10965	mov.b		&nan_bmask,FPSR_CC(%a6)
10966	rts
10967dst_qnan_m:
10968	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
10969	rts
10970
10971#
10972# src_snan(): Return the src SNAN w/ the SNAN bit set.
10973#
10974	global		src_snan
10975src_snan:
10976	fmov.x		SRC(%a0),%fp0		# the fmove sets the SNAN bit
10977	fmov.l		%fpsr,%d0		# catch resulting status
10978	or.l		%d0,USER_FPSR(%a6)	# store status
10979	rts
10980
10981#
10982# src_qnan(): Return the src QNAN.
10983#
10984	global		src_qnan
10985src_qnan:
10986	fmov.x		SRC(%a0),%fp0		# return the non-signalling nan
10987	tst.b		SRC_EX(%a0)		# set ccodes according to QNAN sign
10988	bmi.b		dst_qnan_m
10989src_qnan_p:
10990	mov.b		&nan_bmask,FPSR_CC(%a6)
10991	rts
10992src_qnan_m:
10993	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
10994	rts
10995
10996#
10997# fkern2.s:
10998#	These entry points are used by the exception handler
10999# routines where an instruction is selected by an index into
11000# a large jump table corresponding to a given instruction which
11001# has been decoded. Flow continues here where we now decode
11002# further accoding to the source operand type.
11003#
11004
11005	global		fsinh
11006fsinh:
11007	mov.b		STAG(%a6),%d1
11008	beq.l		ssinh
11009	cmpi.b		%d1,&ZERO
11010	beq.l		src_zero
11011	cmpi.b		%d1,&INF
11012	beq.l		src_inf
11013	cmpi.b		%d1,&DENORM
11014	beq.l		ssinhd
11015	cmpi.b		%d1,&QNAN
11016	beq.l		src_qnan
11017	bra.l		src_snan
11018
11019	global		flognp1
11020flognp1:
11021	mov.b		STAG(%a6),%d1
11022	beq.l		slognp1
11023	cmpi.b		%d1,&ZERO
11024	beq.l		src_zero
11025	cmpi.b		%d1,&INF
11026	beq.l		sopr_inf
11027	cmpi.b		%d1,&DENORM
11028	beq.l		slognp1d
11029	cmpi.b		%d1,&QNAN
11030	beq.l		src_qnan
11031	bra.l		src_snan
11032
11033	global		fetoxm1
11034fetoxm1:
11035	mov.b		STAG(%a6),%d1
11036	beq.l		setoxm1
11037	cmpi.b		%d1,&ZERO
11038	beq.l		src_zero
11039	cmpi.b		%d1,&INF
11040	beq.l		setoxm1i
11041	cmpi.b		%d1,&DENORM
11042	beq.l		setoxm1d
11043	cmpi.b		%d1,&QNAN
11044	beq.l		src_qnan
11045	bra.l		src_snan
11046
11047	global		ftanh
11048ftanh:
11049	mov.b		STAG(%a6),%d1
11050	beq.l		stanh
11051	cmpi.b		%d1,&ZERO
11052	beq.l		src_zero
11053	cmpi.b		%d1,&INF
11054	beq.l		src_one
11055	cmpi.b		%d1,&DENORM
11056	beq.l		stanhd
11057	cmpi.b		%d1,&QNAN
11058	beq.l		src_qnan
11059	bra.l		src_snan
11060
11061	global		fatan
11062fatan:
11063	mov.b		STAG(%a6),%d1
11064	beq.l		satan
11065	cmpi.b		%d1,&ZERO
11066	beq.l		src_zero
11067	cmpi.b		%d1,&INF
11068	beq.l		spi_2
11069	cmpi.b		%d1,&DENORM
11070	beq.l		satand
11071	cmpi.b		%d1,&QNAN
11072	beq.l		src_qnan
11073	bra.l		src_snan
11074
11075	global		fasin
11076fasin:
11077	mov.b		STAG(%a6),%d1
11078	beq.l		sasin
11079	cmpi.b		%d1,&ZERO
11080	beq.l		src_zero
11081	cmpi.b		%d1,&INF
11082	beq.l		t_operr
11083	cmpi.b		%d1,&DENORM
11084	beq.l		sasind
11085	cmpi.b		%d1,&QNAN
11086	beq.l		src_qnan
11087	bra.l		src_snan
11088
11089	global		fatanh
11090fatanh:
11091	mov.b		STAG(%a6),%d1
11092	beq.l		satanh
11093	cmpi.b		%d1,&ZERO
11094	beq.l		src_zero
11095	cmpi.b		%d1,&INF
11096	beq.l		t_operr
11097	cmpi.b		%d1,&DENORM
11098	beq.l		satanhd
11099	cmpi.b		%d1,&QNAN
11100	beq.l		src_qnan
11101	bra.l		src_snan
11102
11103	global		fsine
11104fsine:
11105	mov.b		STAG(%a6),%d1
11106	beq.l		ssin
11107	cmpi.b		%d1,&ZERO
11108	beq.l		src_zero
11109	cmpi.b		%d1,&INF
11110	beq.l		t_operr
11111	cmpi.b		%d1,&DENORM
11112	beq.l		ssind
11113	cmpi.b		%d1,&QNAN
11114	beq.l		src_qnan
11115	bra.l		src_snan
11116
11117	global		ftan
11118ftan:
11119	mov.b		STAG(%a6),%d1
11120	beq.l		stan
11121	cmpi.b		%d1,&ZERO
11122	beq.l		src_zero
11123	cmpi.b		%d1,&INF
11124	beq.l		t_operr
11125	cmpi.b		%d1,&DENORM
11126	beq.l		stand
11127	cmpi.b		%d1,&QNAN
11128	beq.l		src_qnan
11129	bra.l		src_snan
11130
11131	global		fetox
11132fetox:
11133	mov.b		STAG(%a6),%d1
11134	beq.l		setox
11135	cmpi.b		%d1,&ZERO
11136	beq.l		ld_pone
11137	cmpi.b		%d1,&INF
11138	beq.l		szr_inf
11139	cmpi.b		%d1,&DENORM
11140	beq.l		setoxd
11141	cmpi.b		%d1,&QNAN
11142	beq.l		src_qnan
11143	bra.l		src_snan
11144
11145	global		ftwotox
11146ftwotox:
11147	mov.b		STAG(%a6),%d1
11148	beq.l		stwotox
11149	cmpi.b		%d1,&ZERO
11150	beq.l		ld_pone
11151	cmpi.b		%d1,&INF
11152	beq.l		szr_inf
11153	cmpi.b		%d1,&DENORM
11154	beq.l		stwotoxd
11155	cmpi.b		%d1,&QNAN
11156	beq.l		src_qnan
11157	bra.l		src_snan
11158
11159	global		ftentox
11160ftentox:
11161	mov.b		STAG(%a6),%d1
11162	beq.l		stentox
11163	cmpi.b		%d1,&ZERO
11164	beq.l		ld_pone
11165	cmpi.b		%d1,&INF
11166	beq.l		szr_inf
11167	cmpi.b		%d1,&DENORM
11168	beq.l		stentoxd
11169	cmpi.b		%d1,&QNAN
11170	beq.l		src_qnan
11171	bra.l		src_snan
11172
11173	global		flogn
11174flogn:
11175	mov.b		STAG(%a6),%d1
11176	beq.l		slogn
11177	cmpi.b		%d1,&ZERO
11178	beq.l		t_dz2
11179	cmpi.b		%d1,&INF
11180	beq.l		sopr_inf
11181	cmpi.b		%d1,&DENORM
11182	beq.l		slognd
11183	cmpi.b		%d1,&QNAN
11184	beq.l		src_qnan
11185	bra.l		src_snan
11186
11187	global		flog10
11188flog10:
11189	mov.b		STAG(%a6),%d1
11190	beq.l		slog10
11191	cmpi.b		%d1,&ZERO
11192	beq.l		t_dz2
11193	cmpi.b		%d1,&INF
11194	beq.l		sopr_inf
11195	cmpi.b		%d1,&DENORM
11196	beq.l		slog10d
11197	cmpi.b		%d1,&QNAN
11198	beq.l		src_qnan
11199	bra.l		src_snan
11200
11201	global		flog2
11202flog2:
11203	mov.b		STAG(%a6),%d1
11204	beq.l		slog2
11205	cmpi.b		%d1,&ZERO
11206	beq.l		t_dz2
11207	cmpi.b		%d1,&INF
11208	beq.l		sopr_inf
11209	cmpi.b		%d1,&DENORM
11210	beq.l		slog2d
11211	cmpi.b		%d1,&QNAN
11212	beq.l		src_qnan
11213	bra.l		src_snan
11214
11215	global		fcosh
11216fcosh:
11217	mov.b		STAG(%a6),%d1
11218	beq.l		scosh
11219	cmpi.b		%d1,&ZERO
11220	beq.l		ld_pone
11221	cmpi.b		%d1,&INF
11222	beq.l		ld_pinf
11223	cmpi.b		%d1,&DENORM
11224	beq.l		scoshd
11225	cmpi.b		%d1,&QNAN
11226	beq.l		src_qnan
11227	bra.l		src_snan
11228
11229	global		facos
11230facos:
11231	mov.b		STAG(%a6),%d1
11232	beq.l		sacos
11233	cmpi.b		%d1,&ZERO
11234	beq.l		ld_ppi2
11235	cmpi.b		%d1,&INF
11236	beq.l		t_operr
11237	cmpi.b		%d1,&DENORM
11238	beq.l		sacosd
11239	cmpi.b		%d1,&QNAN
11240	beq.l		src_qnan
11241	bra.l		src_snan
11242
11243	global		fcos
11244fcos:
11245	mov.b		STAG(%a6),%d1
11246	beq.l		scos
11247	cmpi.b		%d1,&ZERO
11248	beq.l		ld_pone
11249	cmpi.b		%d1,&INF
11250	beq.l		t_operr
11251	cmpi.b		%d1,&DENORM
11252	beq.l		scosd
11253	cmpi.b		%d1,&QNAN
11254	beq.l		src_qnan
11255	bra.l		src_snan
11256
11257	global		fgetexp
11258fgetexp:
11259	mov.b		STAG(%a6),%d1
11260	beq.l		sgetexp
11261	cmpi.b		%d1,&ZERO
11262	beq.l		src_zero
11263	cmpi.b		%d1,&INF
11264	beq.l		t_operr
11265	cmpi.b		%d1,&DENORM
11266	beq.l		sgetexpd
11267	cmpi.b		%d1,&QNAN
11268	beq.l		src_qnan
11269	bra.l		src_snan
11270
11271	global		fgetman
11272fgetman:
11273	mov.b		STAG(%a6),%d1
11274	beq.l		sgetman
11275	cmpi.b		%d1,&ZERO
11276	beq.l		src_zero
11277	cmpi.b		%d1,&INF
11278	beq.l		t_operr
11279	cmpi.b		%d1,&DENORM
11280	beq.l		sgetmand
11281	cmpi.b		%d1,&QNAN
11282	beq.l		src_qnan
11283	bra.l		src_snan
11284
11285	global		fsincos
11286fsincos:
11287	mov.b		STAG(%a6),%d1
11288	beq.l		ssincos
11289	cmpi.b		%d1,&ZERO
11290	beq.l		ssincosz
11291	cmpi.b		%d1,&INF
11292	beq.l		ssincosi
11293	cmpi.b		%d1,&DENORM
11294	beq.l		ssincosd
11295	cmpi.b		%d1,&QNAN
11296	beq.l		ssincosqnan
11297	bra.l		ssincossnan
11298
11299	global		fmod
11300fmod:
11301	mov.b		STAG(%a6),%d1
11302	beq.l		smod_snorm
11303	cmpi.b		%d1,&ZERO
11304	beq.l		smod_szero
11305	cmpi.b		%d1,&INF
11306	beq.l		smod_sinf
11307	cmpi.b		%d1,&DENORM
11308	beq.l		smod_sdnrm
11309	cmpi.b		%d1,&QNAN
11310	beq.l		sop_sqnan
11311	bra.l		sop_ssnan
11312
11313	global		frem
11314frem:
11315	mov.b		STAG(%a6),%d1
11316	beq.l		srem_snorm
11317	cmpi.b		%d1,&ZERO
11318	beq.l		srem_szero
11319	cmpi.b		%d1,&INF
11320	beq.l		srem_sinf
11321	cmpi.b		%d1,&DENORM
11322	beq.l		srem_sdnrm
11323	cmpi.b		%d1,&QNAN
11324	beq.l		sop_sqnan
11325	bra.l		sop_ssnan
11326
11327	global		fscale
11328fscale:
11329	mov.b		STAG(%a6),%d1
11330	beq.l		sscale_snorm
11331	cmpi.b		%d1,&ZERO
11332	beq.l		sscale_szero
11333	cmpi.b		%d1,&INF
11334	beq.l		sscale_sinf
11335	cmpi.b		%d1,&DENORM
11336	beq.l		sscale_sdnrm
11337	cmpi.b		%d1,&QNAN
11338	beq.l		sop_sqnan
11339	bra.l		sop_ssnan
11340
11341#########################################################################
11342# XDEF ****************************************************************	#
11343# 	fgen_except(): catch an exception during transcendental 	#
11344#		       emulation					#
11345#									#
11346# XREF ****************************************************************	#
11347#	fmul() - emulate a multiply instruction				#
11348#	fadd() - emulate an add instruction				#
11349#	fin() - emulate an fmove instruction				#
11350#									#
11351# INPUT ***************************************************************	#
11352#	fp0 = destination operand					#
11353#	d0  = type of instruction that took exception			#
11354#	fsave frame = source operand					#
11355# 									#
11356# OUTPUT **************************************************************	#
11357#	fp0 = result							#
11358#	fp1 = EXOP							#
11359#									#
11360# ALGORITHM ***********************************************************	#
11361# 	An exception occurred on the last instruction of the 		#
11362# transcendental emulation. hopefully, this won't be happening much 	#
11363# because it will be VERY slow.						#
11364# 	The only exceptions capable of passing through here are		#
11365# Overflow, Underflow, and Unsupported Data Type.			#
11366#									#
11367#########################################################################
11368
11369	global		fgen_except
11370fgen_except:
11371	cmpi.b		0x3(%sp),&0x7		# is exception UNSUPP?
11372	beq.b		fge_unsupp		# yes
11373
11374	mov.b		&NORM,STAG(%a6)
11375
11376fge_cont:
11377	mov.b		&NORM,DTAG(%a6)
11378
11379# ok, I have a problem with putting the dst op at FP_DST. the emulation
11380# routines aren't supposed to alter the operands but we've just squashed
11381# FP_DST here...
11382
11383# 8/17/93 - this turns out to be more of a "cleanliness" standpoint
11384# then a potential bug. to begin with, only the dyadic functions
11385# frem,fmod, and fscale would get the dst trashed here. But, for
11386# the 060SP, the FP_DST is never used again anyways.
11387	fmovm.x		&0x80,FP_DST(%a6)	# dst op is in fp0
11388
11389	lea		0x4(%sp),%a0		# pass: ptr to src op
11390	lea		FP_DST(%a6),%a1		# pass: ptr to dst op
11391
11392	cmpi.b		%d1,&FMOV_OP
11393	beq.b		fge_fin			# it was an "fmov"
11394	cmpi.b		%d1,&FADD_OP
11395	beq.b		fge_fadd		# it was an "fadd"
11396fge_fmul:
11397	bsr.l		fmul
11398	rts
11399fge_fadd:
11400	bsr.l		fadd
11401	rts
11402fge_fin:
11403	bsr.l		fin
11404	rts
11405
11406fge_unsupp:
11407	mov.b		&DENORM,STAG(%a6)
11408	bra.b		fge_cont
11409
11410#
11411# This table holds the offsets of the emulation routines for each individual
11412# math operation relative to the address of this table. Included are
11413# routines like fadd/fmul/fabs as well as the transcendentals.
11414# The location within the table is determined by the extension bits of the
11415# operation longword.
11416#
11417
11418	swbeg		&109
11419tbl_unsupp:
11420	long		fin	 	- tbl_unsupp	# 00: fmove
11421	long		fint	 	- tbl_unsupp	# 01: fint
11422	long		fsinh	 	- tbl_unsupp	# 02: fsinh
11423	long		fintrz	 	- tbl_unsupp	# 03: fintrz
11424	long		fsqrt	 	- tbl_unsupp	# 04: fsqrt
11425	long		tbl_unsupp	- tbl_unsupp
11426	long		flognp1		- tbl_unsupp	# 06: flognp1
11427	long		tbl_unsupp	- tbl_unsupp
11428	long		fetoxm1		- tbl_unsupp	# 08: fetoxm1
11429	long		ftanh		- tbl_unsupp	# 09: ftanh
11430	long		fatan		- tbl_unsupp	# 0a: fatan
11431	long		tbl_unsupp	- tbl_unsupp
11432	long		fasin		- tbl_unsupp	# 0c: fasin
11433	long		fatanh		- tbl_unsupp	# 0d: fatanh
11434	long		fsine		- tbl_unsupp	# 0e: fsin
11435	long		ftan		- tbl_unsupp	# 0f: ftan
11436	long		fetox		- tbl_unsupp	# 10: fetox
11437	long		ftwotox		- tbl_unsupp	# 11: ftwotox
11438	long		ftentox		- tbl_unsupp	# 12: ftentox
11439	long		tbl_unsupp	- tbl_unsupp
11440	long		flogn		- tbl_unsupp	# 14: flogn
11441	long		flog10		- tbl_unsupp	# 15: flog10
11442	long		flog2		- tbl_unsupp	# 16: flog2
11443	long		tbl_unsupp	- tbl_unsupp
11444	long		fabs		- tbl_unsupp 	# 18: fabs
11445	long		fcosh		- tbl_unsupp	# 19: fcosh
11446	long		fneg		- tbl_unsupp 	# 1a: fneg
11447	long		tbl_unsupp	- tbl_unsupp
11448	long		facos		- tbl_unsupp	# 1c: facos
11449	long		fcos		- tbl_unsupp	# 1d: fcos
11450	long		fgetexp		- tbl_unsupp	# 1e: fgetexp
11451	long		fgetman		- tbl_unsupp	# 1f: fgetman
11452	long		fdiv		- tbl_unsupp 	# 20: fdiv
11453	long		fmod		- tbl_unsupp	# 21: fmod
11454	long		fadd		- tbl_unsupp 	# 22: fadd
11455	long		fmul		- tbl_unsupp 	# 23: fmul
11456	long		fsgldiv		- tbl_unsupp 	# 24: fsgldiv
11457	long		frem		- tbl_unsupp	# 25: frem
11458	long		fscale		- tbl_unsupp	# 26: fscale
11459	long		fsglmul		- tbl_unsupp 	# 27: fsglmul
11460	long		fsub		- tbl_unsupp 	# 28: fsub
11461	long		tbl_unsupp	- tbl_unsupp
11462	long		tbl_unsupp	- tbl_unsupp
11463	long		tbl_unsupp	- tbl_unsupp
11464	long		tbl_unsupp	- tbl_unsupp
11465	long		tbl_unsupp	- tbl_unsupp
11466	long		tbl_unsupp	- tbl_unsupp
11467	long		tbl_unsupp	- tbl_unsupp
11468	long		fsincos		- tbl_unsupp	# 30: fsincos
11469	long		fsincos		- tbl_unsupp	# 31: fsincos
11470	long		fsincos		- tbl_unsupp	# 32: fsincos
11471	long		fsincos		- tbl_unsupp	# 33: fsincos
11472	long		fsincos		- tbl_unsupp	# 34: fsincos
11473	long		fsincos		- tbl_unsupp	# 35: fsincos
11474	long		fsincos		- tbl_unsupp	# 36: fsincos
11475	long		fsincos		- tbl_unsupp	# 37: fsincos
11476	long		fcmp		- tbl_unsupp 	# 38: fcmp
11477	long		tbl_unsupp	- tbl_unsupp
11478	long		ftst		- tbl_unsupp 	# 3a: ftst
11479	long		tbl_unsupp	- tbl_unsupp
11480	long		tbl_unsupp	- tbl_unsupp
11481	long		tbl_unsupp	- tbl_unsupp
11482	long		tbl_unsupp	- tbl_unsupp
11483	long		tbl_unsupp	- tbl_unsupp
11484	long		fsin		- tbl_unsupp 	# 40: fsmove
11485	long		fssqrt		- tbl_unsupp 	# 41: fssqrt
11486	long		tbl_unsupp	- tbl_unsupp
11487	long		tbl_unsupp	- tbl_unsupp
11488	long		fdin		- tbl_unsupp	# 44: fdmove
11489	long		fdsqrt		- tbl_unsupp 	# 45: fdsqrt
11490	long		tbl_unsupp	- tbl_unsupp
11491	long		tbl_unsupp	- tbl_unsupp
11492	long		tbl_unsupp	- tbl_unsupp
11493	long		tbl_unsupp	- tbl_unsupp
11494	long		tbl_unsupp	- tbl_unsupp
11495	long		tbl_unsupp	- tbl_unsupp
11496	long		tbl_unsupp	- tbl_unsupp
11497	long		tbl_unsupp	- tbl_unsupp
11498	long		tbl_unsupp	- tbl_unsupp
11499	long		tbl_unsupp	- tbl_unsupp
11500	long		tbl_unsupp	- tbl_unsupp
11501	long		tbl_unsupp	- tbl_unsupp
11502	long		tbl_unsupp	- tbl_unsupp
11503	long		tbl_unsupp	- tbl_unsupp
11504	long		tbl_unsupp	- tbl_unsupp
11505	long		tbl_unsupp	- tbl_unsupp
11506	long		tbl_unsupp	- tbl_unsupp
11507	long		tbl_unsupp	- tbl_unsupp
11508	long		fsabs		- tbl_unsupp 	# 58: fsabs
11509	long		tbl_unsupp	- tbl_unsupp
11510	long		fsneg		- tbl_unsupp 	# 5a: fsneg
11511	long		tbl_unsupp	- tbl_unsupp
11512	long		fdabs		- tbl_unsupp	# 5c: fdabs
11513	long		tbl_unsupp	- tbl_unsupp
11514	long		fdneg		- tbl_unsupp 	# 5e: fdneg
11515	long		tbl_unsupp	- tbl_unsupp
11516	long		fsdiv		- tbl_unsupp	# 60: fsdiv
11517	long		tbl_unsupp	- tbl_unsupp
11518	long		fsadd		- tbl_unsupp	# 62: fsadd
11519	long		fsmul		- tbl_unsupp	# 63: fsmul
11520	long		fddiv		- tbl_unsupp 	# 64: fddiv
11521	long		tbl_unsupp	- tbl_unsupp
11522	long		fdadd		- tbl_unsupp	# 66: fdadd
11523	long		fdmul		- tbl_unsupp 	# 67: fdmul
11524	long		fssub		- tbl_unsupp	# 68: fssub
11525	long		tbl_unsupp	- tbl_unsupp
11526	long		tbl_unsupp	- tbl_unsupp
11527	long		tbl_unsupp	- tbl_unsupp
11528	long		fdsub		- tbl_unsupp 	# 6c: fdsub
11529
11530#########################################################################
11531# XDEF ****************************************************************	#
11532# 	fmul(): emulates the fmul instruction				#
11533#	fsmul(): emulates the fsmul instruction				#
11534#	fdmul(): emulates the fdmul instruction				#
11535#									#
11536# XREF ****************************************************************	#
11537#	scale_to_zero_src() - scale src exponent to zero		#
11538#	scale_to_zero_dst() - scale dst exponent to zero		#
11539#	unf_res() - return default underflow result			#
11540#	ovf_res() - return default overflow result			#
11541# 	res_qnan() - return QNAN result					#
11542# 	res_snan() - return SNAN result					#
11543#									#
11544# INPUT ***************************************************************	#
11545#	a0 = pointer to extended precision source operand		#
11546#	a1 = pointer to extended precision destination operand		#
11547#	d0  rnd prec,mode						#
11548#									#
11549# OUTPUT **************************************************************	#
11550#	fp0 = result							#
11551#	fp1 = EXOP (if exception occurred)				#
11552#									#
11553# ALGORITHM ***********************************************************	#
11554#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11555# norms/denorms into ext/sgl/dbl precision.				#
11556#	For norms/denorms, scale the exponents such that a multiply	#
11557# instruction won't cause an exception. Use the regular fmul to		#
11558# compute a result. Check if the regular operands would have taken	#
11559# an exception. If so, return the default overflow/underflow result	#
11560# and return the EXOP if exceptions are enabled. Else, scale the 	#
11561# result operand to the proper exponent.				#
11562#									#
11563#########################################################################
11564
11565	align 		0x10
11566tbl_fmul_ovfl:
11567	long		0x3fff - 0x7ffe		# ext_max
11568	long		0x3fff - 0x407e		# sgl_max
11569	long		0x3fff - 0x43fe		# dbl_max
11570tbl_fmul_unfl:
11571	long		0x3fff + 0x0001		# ext_unfl
11572	long		0x3fff - 0x3f80		# sgl_unfl
11573	long		0x3fff - 0x3c00		# dbl_unfl
11574
11575	global		fsmul
11576fsmul:
11577	andi.b		&0x30,%d0		# clear rnd prec
11578	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11579	bra.b		fmul
11580
11581	global		fdmul
11582fdmul:
11583	andi.b		&0x30,%d0
11584	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11585
11586	global		fmul
11587fmul:
11588	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11589
11590	clr.w		%d1
11591	mov.b		DTAG(%a6),%d1
11592	lsl.b		&0x3,%d1
11593	or.b		STAG(%a6),%d1		# combine src tags
11594	bne.w		fmul_not_norm		# optimize on non-norm input
11595
11596fmul_norm:
11597	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11598	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11599	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11600
11601	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11602	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11603	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11604
11605	bsr.l		scale_to_zero_src	# scale src exponent
11606	mov.l		%d0,-(%sp)		# save scale factor 1
11607
11608	bsr.l		scale_to_zero_dst	# scale dst exponent
11609
11610	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
11611
11612	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
11613	lsr.b		&0x6,%d1		# shift to lo bits
11614	mov.l		(%sp)+,%d0		# load S.F.
11615	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
11616	beq.w		fmul_may_ovfl		# result may rnd to overflow
11617	blt.w		fmul_ovfl		# result will overflow
11618
11619	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
11620	beq.w		fmul_may_unfl		# result may rnd to no unfl
11621	bgt.w		fmul_unfl		# result will underflow
11622
11623#
11624# NORMAL:
11625# - the result of the multiply operation will neither overflow nor underflow.
11626# - do the multiply to the proper precision and rounding mode.
11627# - scale the result exponent using the scale factor. if both operands were
11628# normalized then we really don't need to go through this scaling. but for now,
11629# this will do.
11630#
11631fmul_normal:
11632	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11633
11634	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11635	fmov.l		&0x0,%fpsr		# clear FPSR
11636
11637	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11638
11639	fmov.l		%fpsr,%d1		# save status
11640	fmov.l		&0x0,%fpcr		# clear FPCR
11641
11642	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11643
11644fmul_normal_exit:
11645	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11646	mov.l		%d2,-(%sp)		# save d2
11647	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
11648	mov.l		%d1,%d2			# make a copy
11649	andi.l		&0x7fff,%d1		# strip sign
11650	andi.w		&0x8000,%d2		# keep old sign
11651	sub.l		%d0,%d1			# add scale factor
11652	or.w		%d2,%d1			# concat old sign,new exp
11653	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11654	mov.l		(%sp)+,%d2		# restore d2
11655	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11656	rts
11657
11658#
11659# OVERFLOW:
11660# - the result of the multiply operation is an overflow.
11661# - do the multiply to the proper precision and rounding mode in order to
11662# set the inexact bits.
11663# - calculate the default result and return it in fp0.
11664# - if overflow or inexact is enabled, we need a multiply result rounded to
11665# extended precision. if the original operation was extended, then we have this
11666# result. if the original operation was single or double, we have to do another
11667# multiply using extended precision and the correct rounding mode. the result
11668# of this operation then has its exponent scaled by -0x6000 to create the
11669# exceptional operand.
11670#
11671fmul_ovfl:
11672	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11673
11674	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11675	fmov.l		&0x0,%fpsr		# clear FPSR
11676
11677	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11678
11679	fmov.l		%fpsr,%d1		# save status
11680	fmov.l		&0x0,%fpcr		# clear FPCR
11681
11682	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11683
11684# save setting this until now because this is where fmul_may_ovfl may jump in
11685fmul_ovfl_tst:
11686	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11687
11688	mov.b		FPCR_ENABLE(%a6),%d1
11689	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11690	bne.b		fmul_ovfl_ena		# yes
11691
11692# calculate the default result
11693fmul_ovfl_dis:
11694	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11695	sne		%d1			# set sign param accordingly
11696	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
11697	bsr.l		ovf_res			# calculate default result
11698	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11699	fmovm.x		(%a0),&0x80		# return default result in fp0
11700	rts
11701
11702#
11703# OVFL is enabled; Create EXOP:
11704# - if precision is extended, then we have the EXOP. simply bias the exponent
11705# with an extra -0x6000. if the precision is single or double, we need to
11706# calculate a result rounded to extended precision.
11707#
11708fmul_ovfl_ena:
11709	mov.l		L_SCR3(%a6),%d1
11710	andi.b		&0xc0,%d1		# test the rnd prec
11711	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
11712
11713fmul_ovfl_ena_cont:
11714	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
11715
11716	mov.l		%d2,-(%sp)		# save d2
11717	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11718	mov.w		%d1,%d2			# make a copy
11719	andi.l		&0x7fff,%d1		# strip sign
11720	sub.l		%d0,%d1			# add scale factor
11721	subi.l		&0x6000,%d1		# subtract bias
11722	andi.w		&0x7fff,%d1		# clear sign bit
11723	andi.w		&0x8000,%d2		# keep old sign
11724	or.w		%d2,%d1			# concat old sign,new exp
11725	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11726	mov.l		(%sp)+,%d2		# restore d2
11727	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11728	bra.b		fmul_ovfl_dis
11729
11730fmul_ovfl_ena_sd:
11731	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11732
11733	mov.l		L_SCR3(%a6),%d1
11734	andi.b		&0x30,%d1		# keep rnd mode only
11735	fmov.l		%d1,%fpcr		# set FPCR
11736
11737	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11738
11739	fmov.l		&0x0,%fpcr		# clear FPCR
11740	bra.b		fmul_ovfl_ena_cont
11741
11742#
11743# may OVERFLOW:
11744# - the result of the multiply operation MAY overflow.
11745# - do the multiply to the proper precision and rounding mode in order to
11746# set the inexact bits.
11747# - calculate the default result and return it in fp0.
11748#
11749fmul_may_ovfl:
11750	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11751
11752	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11753	fmov.l		&0x0,%fpsr		# clear FPSR
11754
11755	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11756
11757	fmov.l		%fpsr,%d1		# save status
11758	fmov.l		&0x0,%fpcr		# clear FPCR
11759
11760	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11761
11762	fabs.x		%fp0,%fp1		# make a copy of result
11763	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
11764	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
11765
11766# no, it didn't overflow; we have correct result
11767	bra.w		fmul_normal_exit
11768
11769#
11770# UNDERFLOW:
11771# - the result of the multiply operation is an underflow.
11772# - do the multiply to the proper precision and rounding mode in order to
11773# set the inexact bits.
11774# - calculate the default result and return it in fp0.
11775# - if overflow or inexact is enabled, we need a multiply result rounded to
11776# extended precision. if the original operation was extended, then we have this
11777# result. if the original operation was single or double, we have to do another
11778# multiply using extended precision and the correct rounding mode. the result
11779# of this operation then has its exponent scaled by -0x6000 to create the
11780# exceptional operand.
11781#
11782fmul_unfl:
11783	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11784
11785# for fun, let's use only extended precision, round to zero. then, let
11786# the unf_res() routine figure out all the rest.
11787# will we get the correct answer.
11788	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11789
11790	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11791	fmov.l		&0x0,%fpsr		# clear FPSR
11792
11793	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11794
11795	fmov.l		%fpsr,%d1		# save status
11796	fmov.l		&0x0,%fpcr		# clear FPCR
11797
11798	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11799
11800	mov.b		FPCR_ENABLE(%a6),%d1
11801	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11802	bne.b		fmul_unfl_ena		# yes
11803
11804fmul_unfl_dis:
11805	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11806
11807	lea		FP_SCR0(%a6),%a0	# pass: result addr
11808	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11809	bsr.l		unf_res			# calculate default result
11810	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
11811	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11812	rts
11813
11814#
11815# UNFL is enabled.
11816#
11817fmul_unfl_ena:
11818	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
11819
11820	mov.l		L_SCR3(%a6),%d1
11821	andi.b		&0xc0,%d1		# is precision extended?
11822	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
11823
11824# if the rnd mode is anything but RZ, then we have to re-do the above
11825# multiplication because we used RZ for all.
11826	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11827
11828fmul_unfl_ena_cont:
11829	fmov.l		&0x0,%fpsr		# clear FPSR
11830
11831	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
11832
11833	fmov.l		&0x0,%fpcr		# clear FPCR
11834
11835	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
11836	mov.l		%d2,-(%sp)		# save d2
11837	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11838	mov.l		%d1,%d2			# make a copy
11839	andi.l		&0x7fff,%d1		# strip sign
11840	andi.w		&0x8000,%d2		# keep old sign
11841	sub.l		%d0,%d1			# add scale factor
11842	addi.l		&0x6000,%d1		# add bias
11843	andi.w		&0x7fff,%d1
11844	or.w		%d2,%d1			# concat old sign,new exp
11845	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11846	mov.l		(%sp)+,%d2		# restore d2
11847	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11848	bra.w		fmul_unfl_dis
11849
11850fmul_unfl_ena_sd:
11851	mov.l		L_SCR3(%a6),%d1
11852	andi.b		&0x30,%d1		# use only rnd mode
11853	fmov.l		%d1,%fpcr		# set FPCR
11854
11855	bra.b		fmul_unfl_ena_cont
11856
11857# MAY UNDERFLOW:
11858# -use the correct rounding mode and precision. this code favors operations
11859# that do not underflow.
11860fmul_may_unfl:
11861	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11862
11863	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11864	fmov.l		&0x0,%fpsr		# clear FPSR
11865
11866	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11867
11868	fmov.l		%fpsr,%d1		# save status
11869	fmov.l		&0x0,%fpcr		# clear FPCR
11870
11871	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11872
11873	fabs.x		%fp0,%fp1		# make a copy of result
11874	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
11875	fbgt.w		fmul_normal_exit	# no; no underflow occurred
11876	fblt.w		fmul_unfl		# yes; underflow occurred
11877
11878#
11879# we still don't know if underflow occurred. result is ~ equal to 2. but,
11880# we don't know if the result was an underflow that rounded up to a 2 or
11881# a normalized number that rounded down to a 2. so, redo the entire operation
11882# using RZ as the rounding mode to see what the pre-rounded result is.
11883# this case should be relatively rare.
11884#
11885	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
11886
11887	mov.l		L_SCR3(%a6),%d1
11888	andi.b		&0xc0,%d1		# keep rnd prec
11889	ori.b		&rz_mode*0x10,%d1	# insert RZ
11890
11891	fmov.l		%d1,%fpcr		# set FPCR
11892	fmov.l		&0x0,%fpsr		# clear FPSR
11893
11894	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
11895
11896	fmov.l		&0x0,%fpcr		# clear FPCR
11897	fabs.x		%fp1			# make absolute value
11898	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
11899	fbge.w		fmul_normal_exit	# no; no underflow occurred
11900	bra.w		fmul_unfl		# yes, underflow occurred
11901
11902################################################################################
11903
11904#
11905# Multiply: inputs are not both normalized; what are they?
11906#
11907fmul_not_norm:
11908	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
11909	jmp		(tbl_fmul_op.b,%pc,%d1.w)
11910
11911	swbeg		&48
11912tbl_fmul_op:
11913	short		fmul_norm	- tbl_fmul_op # NORM x NORM
11914	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
11915	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
11916	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
11917	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
11918	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
11919	short		tbl_fmul_op	- tbl_fmul_op #
11920	short		tbl_fmul_op	- tbl_fmul_op #
11921
11922	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
11923	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
11924	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
11925	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
11926	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
11927	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
11928	short		tbl_fmul_op	- tbl_fmul_op #
11929	short		tbl_fmul_op	- tbl_fmul_op #
11930
11931	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
11932	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
11933	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
11934	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
11935	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
11936	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
11937	short		tbl_fmul_op	- tbl_fmul_op #
11938	short		tbl_fmul_op	- tbl_fmul_op #
11939
11940	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
11941	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
11942	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
11943	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
11944	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
11945	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
11946	short		tbl_fmul_op	- tbl_fmul_op #
11947	short		tbl_fmul_op	- tbl_fmul_op #
11948
11949	short		fmul_norm	- tbl_fmul_op # NORM x NORM
11950	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
11951	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
11952	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
11953	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
11954	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
11955	short		tbl_fmul_op	- tbl_fmul_op #
11956	short		tbl_fmul_op	- tbl_fmul_op #
11957
11958	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
11959	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
11960	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
11961	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
11962	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
11963	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
11964	short		tbl_fmul_op	- tbl_fmul_op #
11965	short		tbl_fmul_op	- tbl_fmul_op #
11966
11967fmul_res_operr:
11968	bra.l		res_operr
11969fmul_res_snan:
11970	bra.l		res_snan
11971fmul_res_qnan:
11972	bra.l		res_qnan
11973
11974#
11975# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
11976#
11977	global		fmul_zero		# global for fsglmul
11978fmul_zero:
11979	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11980	mov.b		DST_EX(%a1),%d1
11981	eor.b		%d0,%d1
11982	bpl.b		fmul_zero_p		# result ZERO is pos.
11983fmul_zero_n:
11984	fmov.s		&0x80000000,%fp0	# load -ZERO
11985	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
11986	rts
11987fmul_zero_p:
11988	fmov.s		&0x00000000,%fp0	# load +ZERO
11989	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11990	rts
11991
11992#
11993# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
11994#
11995# Note: The j-bit for an infinity is a don't-care. However, to be
11996# strictly compatible w/ the 68881/882, we make sure to return an
11997# INF w/ the j-bit set if the input INF j-bit was set. Destination
11998# INFs take priority.
11999#
12000	global		fmul_inf_dst		# global for fsglmul
12001fmul_inf_dst:
12002	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
12003	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
12004	mov.b		DST_EX(%a1),%d1
12005	eor.b		%d0,%d1
12006	bpl.b		fmul_inf_dst_p		# result INF is pos.
12007fmul_inf_dst_n:
12008	fabs.x		%fp0			# clear result sign
12009	fneg.x		%fp0			# set result sign
12010	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12011	rts
12012fmul_inf_dst_p:
12013	fabs.x		%fp0			# clear result sign
12014	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
12015	rts
12016
12017	global		fmul_inf_src		# global for fsglmul
12018fmul_inf_src:
12019	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
12020	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
12021	mov.b		DST_EX(%a1),%d1
12022	eor.b		%d0,%d1
12023	bpl.b		fmul_inf_dst_p		# result INF is pos.
12024	bra.b		fmul_inf_dst_n
12025
12026#########################################################################
12027# XDEF ****************************************************************	#
12028#	fin(): emulates the fmove instruction				#
12029#	fsin(): emulates the fsmove instruction				#
12030#	fdin(): emulates the fdmove instruction				#
12031#									#
12032# XREF ****************************************************************	#
12033#	norm() - normalize mantissa for EXOP on denorm			#
12034#	scale_to_zero_src() - scale src exponent to zero		#
12035#	ovf_res() - return default overflow result			#
12036# 	unf_res() - return default underflow result			#
12037#	res_qnan_1op() - return QNAN result				#
12038#	res_snan_1op() - return SNAN result				#
12039#									#
12040# INPUT ***************************************************************	#
12041#	a0 = pointer to extended precision source operand		#
12042#	d0 = round prec/mode						#
12043# 									#
12044# OUTPUT **************************************************************	#
12045#	fp0 = result							#
12046#	fp1 = EXOP (if exception occurred)				#
12047#									#
12048# ALGORITHM ***********************************************************	#
12049# 	Handle NANs, infinities, and zeroes as special cases. Divide	#
12050# norms into extended, single, and double precision.			#
12051# 	Norms can be emulated w/ a regular fmove instruction. For	#
12052# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
12053# if the result would have overflowed/underflowed. If so, use unf_res()	#
12054# or ovf_res() to return the default result. Also return EXOP if	#
12055# exception is enabled. If no exception, return the default result.	#
12056#	Unnorms don't pass through here.				#
12057#									#
12058#########################################################################
12059
12060	global		fsin
12061fsin:
12062	andi.b		&0x30,%d0		# clear rnd prec
12063	ori.b		&s_mode*0x10,%d0	# insert sgl precision
12064	bra.b		fin
12065
12066	global		fdin
12067fdin:
12068	andi.b		&0x30,%d0		# clear rnd prec
12069	ori.b		&d_mode*0x10,%d0	# insert dbl precision
12070
12071	global		fin
12072fin:
12073	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12074
12075	mov.b		STAG(%a6),%d1		# fetch src optype tag
12076	bne.w		fin_not_norm		# optimize on non-norm input
12077
12078#
12079# FP MOVE IN: NORMs and DENORMs ONLY!
12080#
12081fin_norm:
12082	andi.b		&0xc0,%d0		# is precision extended?
12083	bne.w		fin_not_ext		# no, so go handle dbl or sgl
12084
12085#
12086# precision selected is extended. so...we cannot get an underflow
12087# or overflow because of rounding to the correct precision. so...
12088# skip the scaling and unscaling...
12089#
12090	tst.b		SRC_EX(%a0)		# is the operand negative?
12091	bpl.b		fin_norm_done		# no
12092	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
12093fin_norm_done:
12094	fmovm.x		SRC(%a0),&0x80		# return result in fp0
12095	rts
12096
12097#
12098# for an extended precision DENORM, the UNFL exception bit is set
12099# the accrued bit is NOT set in this instance(no inexactness!)
12100#
12101fin_denorm:
12102	andi.b		&0xc0,%d0		# is precision extended?
12103	bne.w		fin_not_ext		# no, so go handle dbl or sgl
12104
12105	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12106	tst.b		SRC_EX(%a0)		# is the operand negative?
12107	bpl.b		fin_denorm_done		# no
12108	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
12109fin_denorm_done:
12110	fmovm.x		SRC(%a0),&0x80		# return result in fp0
12111	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12112	bne.b		fin_denorm_unfl_ena	# yes
12113	rts
12114
12115#
12116# the input is an extended DENORM and underflow is enabled in the FPCR.
12117# normalize the mantissa and add the bias of 0x6000 to the resulting negative
12118# exponent and insert back into the operand.
12119#
12120fin_denorm_unfl_ena:
12121	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12122	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12123	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12124	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
12125	bsr.l		norm			# normalize result
12126	neg.w		%d0			# new exponent = -(shft val)
12127	addi.w		&0x6000,%d0		# add new bias to exponent
12128	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
12129	andi.w		&0x8000,%d1		# keep old sign
12130	andi.w		&0x7fff,%d0		# clear sign position
12131	or.w		%d1,%d0			# concat new exo,old sign
12132	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
12133	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12134	rts
12135
12136#
12137# operand is to be rounded to single or double precision
12138#
12139fin_not_ext:
12140	cmpi.b		%d0,&s_mode*0x10 	# separate sgl/dbl prec
12141	bne.b		fin_dbl
12142
12143#
12144# operand is to be rounded to single precision
12145#
12146fin_sgl:
12147	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12148	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12149	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12150	bsr.l		scale_to_zero_src	# calculate scale factor
12151
12152	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
12153	bge.w		fin_sd_unfl		# yes; go handle underflow
12154	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
12155	beq.w		fin_sd_may_ovfl		# maybe; go check
12156	blt.w		fin_sd_ovfl		# yes; go handle overflow
12157
12158#
12159# operand will NOT overflow or underflow when moved into the fp reg file
12160#
12161fin_sd_normal:
12162	fmov.l		&0x0,%fpsr		# clear FPSR
12163	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12164
12165	fmov.x		FP_SCR0(%a6),%fp0	# perform move
12166
12167	fmov.l		%fpsr,%d1		# save FPSR
12168	fmov.l		&0x0,%fpcr		# clear FPCR
12169
12170	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12171
12172fin_sd_normal_exit:
12173	mov.l		%d2,-(%sp)		# save d2
12174	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12175	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
12176	mov.w		%d1,%d2			# make a copy
12177	andi.l		&0x7fff,%d1		# strip sign
12178	sub.l		%d0,%d1			# add scale factor
12179	andi.w		&0x8000,%d2		# keep old sign
12180	or.w		%d1,%d2			# concat old sign,new exponent
12181	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12182	mov.l		(%sp)+,%d2		# restore d2
12183	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12184	rts
12185
12186#
12187# operand is to be rounded to double precision
12188#
12189fin_dbl:
12190	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12191	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12192	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12193	bsr.l		scale_to_zero_src	# calculate scale factor
12194
12195	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
12196	bge.w		fin_sd_unfl		# yes; go handle underflow
12197	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
12198	beq.w		fin_sd_may_ovfl		# maybe; go check
12199	blt.w		fin_sd_ovfl		# yes; go handle overflow
12200	bra.w		fin_sd_normal		# no; ho handle normalized op
12201
12202#
12203# operand WILL underflow when moved in to the fp register file
12204#
12205fin_sd_unfl:
12206	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12207
12208	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
12209	bpl.b		fin_sd_unfl_tst
12210	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
12211
12212# if underflow or inexact is enabled, then go calculate the EXOP first.
12213fin_sd_unfl_tst:
12214	mov.b		FPCR_ENABLE(%a6),%d1
12215	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12216	bne.b		fin_sd_unfl_ena		# yes
12217
12218fin_sd_unfl_dis:
12219	lea		FP_SCR0(%a6),%a0	# pass: result addr
12220	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12221	bsr.l		unf_res			# calculate default result
12222	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
12223	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12224	rts
12225
12226#
12227# operand will underflow AND underflow or inexact is enabled.
12228# therefore, we must return the result rounded to extended precision.
12229#
12230fin_sd_unfl_ena:
12231	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12232	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12233	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
12234
12235	mov.l		%d2,-(%sp)		# save d2
12236	mov.w		%d1,%d2			# make a copy
12237	andi.l		&0x7fff,%d1		# strip sign
12238	sub.l		%d0,%d1			# subtract scale factor
12239	andi.w		&0x8000,%d2		# extract old sign
12240	addi.l		&0x6000,%d1		# add new bias
12241	andi.w		&0x7fff,%d1
12242	or.w		%d1,%d2			# concat old sign,new exp
12243	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
12244	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
12245	mov.l		(%sp)+,%d2		# restore d2
12246	bra.b		fin_sd_unfl_dis
12247
12248#
12249# operand WILL overflow.
12250#
12251fin_sd_ovfl:
12252	fmov.l		&0x0,%fpsr		# clear FPSR
12253	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12254
12255	fmov.x		FP_SCR0(%a6),%fp0	# perform move
12256
12257	fmov.l		&0x0,%fpcr		# clear FPCR
12258	fmov.l		%fpsr,%d1		# save FPSR
12259
12260	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12261
12262fin_sd_ovfl_tst:
12263	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12264
12265	mov.b		FPCR_ENABLE(%a6),%d1
12266	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12267	bne.b		fin_sd_ovfl_ena		# yes
12268
12269#
12270# OVFL is not enabled; therefore, we must create the default result by
12271# calling ovf_res().
12272#
12273fin_sd_ovfl_dis:
12274	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12275	sne		%d1			# set sign param accordingly
12276	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
12277	bsr.l		ovf_res			# calculate default result
12278	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
12279	fmovm.x		(%a0),&0x80		# return default result in fp0
12280	rts
12281
12282#
12283# OVFL is enabled.
12284# the INEX2 bit has already been updated by the round to the correct precision.
12285# now, round to extended(and don't alter the FPSR).
12286#
12287fin_sd_ovfl_ena:
12288	mov.l		%d2,-(%sp)		# save d2
12289	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12290	mov.l		%d1,%d2			# make a copy
12291	andi.l		&0x7fff,%d1		# strip sign
12292	andi.w		&0x8000,%d2		# keep old sign
12293	sub.l		%d0,%d1			# add scale factor
12294	sub.l		&0x6000,%d1		# subtract bias
12295	andi.w		&0x7fff,%d1
12296	or.w		%d2,%d1
12297	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12298	mov.l		(%sp)+,%d2		# restore d2
12299	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12300	bra.b		fin_sd_ovfl_dis
12301
12302#
12303# the move in MAY overflow. so...
12304#
12305fin_sd_may_ovfl:
12306	fmov.l		&0x0,%fpsr		# clear FPSR
12307	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12308
12309	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
12310
12311	fmov.l		%fpsr,%d1		# save status
12312	fmov.l		&0x0,%fpcr		# clear FPCR
12313
12314	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12315
12316	fabs.x		%fp0,%fp1		# make a copy of result
12317	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
12318	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
12319
12320# no, it didn't overflow; we have correct result
12321	bra.w		fin_sd_normal_exit
12322
12323##########################################################################
12324
12325#
12326# operand is not a NORM: check its optype and branch accordingly
12327#
12328fin_not_norm:
12329	cmpi.b		%d1,&DENORM		# weed out DENORM
12330	beq.w		fin_denorm
12331	cmpi.b		%d1,&SNAN		# weed out SNANs
12332	beq.l		res_snan_1op
12333	cmpi.b		%d1,&QNAN		# weed out QNANs
12334	beq.l		res_qnan_1op
12335
12336#
12337# do the fmove in; at this point, only possible ops are ZERO and INF.
12338# use fmov to determine ccodes.
12339# prec:mode should be zero at this point but it won't affect answer anyways.
12340#
12341	fmov.x		SRC(%a0),%fp0		# do fmove in
12342	fmov.l		%fpsr,%d0		# no exceptions possible
12343	rol.l		&0x8,%d0		# put ccodes in lo byte
12344	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
12345	rts
12346
12347#########################################################################
12348# XDEF ****************************************************************	#
12349# 	fdiv(): emulates the fdiv instruction				#
12350#	fsdiv(): emulates the fsdiv instruction				#
12351#	fddiv(): emulates the fddiv instruction				#
12352#									#
12353# XREF ****************************************************************	#
12354#	scale_to_zero_src() - scale src exponent to zero		#
12355#	scale_to_zero_dst() - scale dst exponent to zero		#
12356#	unf_res() - return default underflow result			#
12357#	ovf_res() - return default overflow result			#
12358# 	res_qnan() - return QNAN result					#
12359# 	res_snan() - return SNAN result					#
12360#									#
12361# INPUT ***************************************************************	#
12362#	a0 = pointer to extended precision source operand		#
12363#	a1 = pointer to extended precision destination operand		#
12364#	d0  rnd prec,mode						#
12365#									#
12366# OUTPUT **************************************************************	#
12367#	fp0 = result							#
12368#	fp1 = EXOP (if exception occurred)				#
12369#									#
12370# ALGORITHM ***********************************************************	#
12371#	Handle NANs, infinities, and zeroes as special cases. Divide	#
12372# norms/denorms into ext/sgl/dbl precision.				#
12373#	For norms/denorms, scale the exponents such that a divide	#
12374# instruction won't cause an exception. Use the regular fdiv to		#
12375# compute a result. Check if the regular operands would have taken	#
12376# an exception. If so, return the default overflow/underflow result	#
12377# and return the EXOP if exceptions are enabled. Else, scale the 	#
12378# result operand to the proper exponent.				#
12379#									#
12380#########################################################################
12381
12382	align		0x10
12383tbl_fdiv_unfl:
12384	long		0x3fff - 0x0000		# ext_unfl
12385	long		0x3fff - 0x3f81		# sgl_unfl
12386	long		0x3fff - 0x3c01		# dbl_unfl
12387
12388tbl_fdiv_ovfl:
12389	long		0x3fff - 0x7ffe		# ext overflow exponent
12390	long		0x3fff - 0x407e		# sgl overflow exponent
12391	long		0x3fff - 0x43fe		# dbl overflow exponent
12392
12393	global		fsdiv
12394fsdiv:
12395	andi.b		&0x30,%d0		# clear rnd prec
12396	ori.b		&s_mode*0x10,%d0	# insert sgl prec
12397	bra.b		fdiv
12398
12399	global		fddiv
12400fddiv:
12401	andi.b		&0x30,%d0		# clear rnd prec
12402	ori.b		&d_mode*0x10,%d0	# insert dbl prec
12403
12404	global		fdiv
12405fdiv:
12406	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12407
12408	clr.w		%d1
12409	mov.b		DTAG(%a6),%d1
12410	lsl.b		&0x3,%d1
12411	or.b		STAG(%a6),%d1		# combine src tags
12412
12413	bne.w		fdiv_not_norm		# optimize on non-norm input
12414
12415#
12416# DIVIDE: NORMs and DENORMs ONLY!
12417#
12418fdiv_norm:
12419	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
12420	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
12421	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
12422
12423	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12424	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12425	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12426
12427	bsr.l		scale_to_zero_src	# scale src exponent
12428	mov.l		%d0,-(%sp)		# save scale factor 1
12429
12430	bsr.l		scale_to_zero_dst	# scale dst exponent
12431
12432	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
12433	add.l		%d0,(%sp)
12434
12435	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
12436	lsr.b		&0x6,%d1		# shift to lo bits
12437	mov.l		(%sp)+,%d0		# load S.F.
12438	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
12439	ble.w		fdiv_may_ovfl		# result will overflow
12440
12441	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
12442	beq.w		fdiv_may_unfl		# maybe
12443	bgt.w		fdiv_unfl		# yes; go handle underflow
12444
12445fdiv_normal:
12446	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12447
12448	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
12449	fmov.l		&0x0,%fpsr		# clear FPSR
12450
12451	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
12452
12453	fmov.l		%fpsr,%d1		# save FPSR
12454	fmov.l		&0x0,%fpcr		# clear FPCR
12455
12456	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12457
12458fdiv_normal_exit:
12459	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
12460	mov.l		%d2,-(%sp)		# store d2
12461	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
12462	mov.l		%d1,%d2			# make a copy
12463	andi.l		&0x7fff,%d1		# strip sign
12464	andi.w		&0x8000,%d2		# keep old sign
12465	sub.l		%d0,%d1			# add scale factor
12466	or.w		%d2,%d1			# concat old sign,new exp
12467	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12468	mov.l		(%sp)+,%d2		# restore d2
12469	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12470	rts
12471
12472tbl_fdiv_ovfl2:
12473	long		0x7fff
12474	long		0x407f
12475	long		0x43ff
12476
12477fdiv_no_ovfl:
12478	mov.l		(%sp)+,%d0		# restore scale factor
12479	bra.b		fdiv_normal_exit
12480
12481fdiv_may_ovfl:
12482	mov.l		%d0,-(%sp)		# save scale factor
12483
12484	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12485
12486	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12487	fmov.l		&0x0,%fpsr		# set FPSR
12488
12489	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12490
12491	fmov.l		%fpsr,%d0
12492	fmov.l		&0x0,%fpcr
12493
12494	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
12495
12496	fmovm.x		&0x01,-(%sp)		# save result to stack
12497	mov.w		(%sp),%d0		# fetch new exponent
12498	add.l		&0xc,%sp		# clear result from stack
12499	andi.l		&0x7fff,%d0		# strip sign
12500	sub.l		(%sp),%d0		# add scale factor
12501	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
12502	blt.b		fdiv_no_ovfl
12503	mov.l		(%sp)+,%d0
12504
12505fdiv_ovfl_tst:
12506	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12507
12508	mov.b		FPCR_ENABLE(%a6),%d1
12509	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12510	bne.b		fdiv_ovfl_ena		# yes
12511
12512fdiv_ovfl_dis:
12513	btst		&neg_bit,FPSR_CC(%a6) 	# is result negative?
12514	sne		%d1			# set sign param accordingly
12515	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
12516	bsr.l		ovf_res			# calculate default result
12517	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
12518	fmovm.x		(%a0),&0x80		# return default result in fp0
12519	rts
12520
12521fdiv_ovfl_ena:
12522	mov.l		L_SCR3(%a6),%d1
12523	andi.b		&0xc0,%d1		# is precision extended?
12524	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
12525
12526fdiv_ovfl_ena_cont:
12527	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
12528
12529	mov.l		%d2,-(%sp)		# save d2
12530	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12531	mov.w		%d1,%d2			# make a copy
12532	andi.l		&0x7fff,%d1		# strip sign
12533	sub.l		%d0,%d1			# add scale factor
12534	subi.l		&0x6000,%d1		# subtract bias
12535	andi.w		&0x7fff,%d1		# clear sign bit
12536	andi.w		&0x8000,%d2		# keep old sign
12537	or.w		%d2,%d1			# concat old sign,new exp
12538	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12539	mov.l		(%sp)+,%d2		# restore d2
12540	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12541	bra.b		fdiv_ovfl_dis
12542
12543fdiv_ovfl_ena_sd:
12544	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
12545
12546	mov.l		L_SCR3(%a6),%d1
12547	andi.b		&0x30,%d1		# keep rnd mode
12548	fmov.l		%d1,%fpcr		# set FPCR
12549
12550	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12551
12552	fmov.l		&0x0,%fpcr		# clear FPCR
12553	bra.b		fdiv_ovfl_ena_cont
12554
12555fdiv_unfl:
12556	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12557
12558	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12559
12560	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
12561	fmov.l		&0x0,%fpsr		# clear FPSR
12562
12563	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12564
12565	fmov.l		%fpsr,%d1		# save status
12566	fmov.l		&0x0,%fpcr		# clear FPCR
12567
12568	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12569
12570	mov.b		FPCR_ENABLE(%a6),%d1
12571	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12572	bne.b		fdiv_unfl_ena		# yes
12573
12574fdiv_unfl_dis:
12575	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12576
12577	lea		FP_SCR0(%a6),%a0	# pass: result addr
12578	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12579	bsr.l		unf_res			# calculate default result
12580	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
12581	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12582	rts
12583
12584#
12585# UNFL is enabled.
12586#
12587fdiv_unfl_ena:
12588	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
12589
12590	mov.l		L_SCR3(%a6),%d1
12591	andi.b		&0xc0,%d1		# is precision extended?
12592	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
12593
12594	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12595
12596fdiv_unfl_ena_cont:
12597	fmov.l		&0x0,%fpsr		# clear FPSR
12598
12599	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
12600
12601	fmov.l		&0x0,%fpcr		# clear FPCR
12602
12603	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
12604	mov.l		%d2,-(%sp)		# save d2
12605	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12606	mov.l		%d1,%d2			# make a copy
12607	andi.l		&0x7fff,%d1		# strip sign
12608	andi.w		&0x8000,%d2		# keep old sign
12609	sub.l		%d0,%d1			# add scale factoer
12610	addi.l		&0x6000,%d1		# add bias
12611	andi.w		&0x7fff,%d1
12612	or.w		%d2,%d1			# concat old sign,new exp
12613	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
12614	mov.l		(%sp)+,%d2		# restore d2
12615	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12616	bra.w		fdiv_unfl_dis
12617
12618fdiv_unfl_ena_sd:
12619	mov.l		L_SCR3(%a6),%d1
12620	andi.b		&0x30,%d1		# use only rnd mode
12621	fmov.l		%d1,%fpcr		# set FPCR
12622
12623	bra.b		fdiv_unfl_ena_cont
12624
12625#
12626# the divide operation MAY underflow:
12627#
12628fdiv_may_unfl:
12629	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12630
12631	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12632	fmov.l		&0x0,%fpsr		# clear FPSR
12633
12634	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12635
12636	fmov.l		%fpsr,%d1		# save status
12637	fmov.l		&0x0,%fpcr		# clear FPCR
12638
12639	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12640
12641	fabs.x		%fp0,%fp1		# make a copy of result
12642	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
12643	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
12644	fblt.w		fdiv_unfl		# yes; underflow occurred
12645
12646#
12647# we still don't know if underflow occurred. result is ~ equal to 1. but,
12648# we don't know if the result was an underflow that rounded up to a 1
12649# or a normalized number that rounded down to a 1. so, redo the entire
12650# operation using RZ as the rounding mode to see what the pre-rounded
12651# result is. this case should be relatively rare.
12652#
12653	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
12654
12655	mov.l		L_SCR3(%a6),%d1
12656	andi.b		&0xc0,%d1		# keep rnd prec
12657	ori.b		&rz_mode*0x10,%d1	# insert RZ
12658
12659	fmov.l		%d1,%fpcr		# set FPCR
12660	fmov.l		&0x0,%fpsr		# clear FPSR
12661
12662	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
12663
12664	fmov.l		&0x0,%fpcr		# clear FPCR
12665	fabs.x		%fp1			# make absolute value
12666	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
12667	fbge.w		fdiv_normal_exit	# no; no underflow occurred
12668	bra.w		fdiv_unfl		# yes; underflow occurred
12669
12670############################################################################
12671
12672#
12673# Divide: inputs are not both normalized; what are they?
12674#
12675fdiv_not_norm:
12676	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
12677	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
12678
12679	swbeg		&48
12680tbl_fdiv_op:
12681	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
12682	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
12683	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
12684	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
12685	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
12686	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
12687	short		tbl_fdiv_op	- tbl_fdiv_op #
12688	short		tbl_fdiv_op	- tbl_fdiv_op #
12689
12690	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
12691	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
12692	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
12693	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
12694	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
12695	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
12696	short		tbl_fdiv_op	- tbl_fdiv_op #
12697	short		tbl_fdiv_op	- tbl_fdiv_op #
12698
12699	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
12700	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
12701	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
12702	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
12703	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
12704	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
12705	short		tbl_fdiv_op	- tbl_fdiv_op #
12706	short		tbl_fdiv_op	- tbl_fdiv_op #
12707
12708	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
12709	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
12710	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
12711	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
12712	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
12713	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
12714	short		tbl_fdiv_op	- tbl_fdiv_op #
12715	short		tbl_fdiv_op	- tbl_fdiv_op #
12716
12717	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
12718	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
12719	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
12720	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
12721	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
12722	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
12723	short		tbl_fdiv_op	- tbl_fdiv_op #
12724	short		tbl_fdiv_op	- tbl_fdiv_op #
12725
12726	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
12727	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
12728	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
12729	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
12730	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
12731	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
12732	short		tbl_fdiv_op	- tbl_fdiv_op #
12733	short		tbl_fdiv_op	- tbl_fdiv_op #
12734
12735fdiv_res_qnan:
12736	bra.l		res_qnan
12737fdiv_res_snan:
12738	bra.l		res_snan
12739fdiv_res_operr:
12740	bra.l		res_operr
12741
12742	global		fdiv_zero_load		# global for fsgldiv
12743fdiv_zero_load:
12744	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
12745	mov.b		DST_EX(%a1),%d1		# or of input signs.
12746	eor.b		%d0,%d1
12747	bpl.b		fdiv_zero_load_p	# result is positive
12748	fmov.s		&0x80000000,%fp0	# load a -ZERO
12749	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
12750	rts
12751fdiv_zero_load_p:
12752	fmov.s		&0x00000000,%fp0	# load a +ZERO
12753	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
12754	rts
12755
12756#
12757# The destination was In Range and the source was a ZERO. The result,
12758# therefore, is an INF w/ the proper sign.
12759# So, determine the sign and return a new INF (w/ the j-bit cleared).
12760#
12761	global		fdiv_inf_load		# global for fsgldiv
12762fdiv_inf_load:
12763	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
12764	mov.b		SRC_EX(%a0),%d0		# load both signs
12765	mov.b		DST_EX(%a1),%d1
12766	eor.b		%d0,%d1
12767	bpl.b		fdiv_inf_load_p		# result is positive
12768	fmov.s		&0xff800000,%fp0	# make result -INF
12769	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12770	rts
12771fdiv_inf_load_p:
12772	fmov.s		&0x7f800000,%fp0	# make result +INF
12773	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
12774	rts
12775
12776#
12777# The destination was an INF w/ an In Range or ZERO source, the result is
12778# an INF w/ the proper sign.
12779# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
12780# dst INF is set, then then j-bit of the result INF is also set).
12781#
12782	global		fdiv_inf_dst		# global for fsgldiv
12783fdiv_inf_dst:
12784	mov.b		DST_EX(%a1),%d0		# load both signs
12785	mov.b		SRC_EX(%a0),%d1
12786	eor.b		%d0,%d1
12787	bpl.b		fdiv_inf_dst_p		# result is positive
12788
12789	fmovm.x		DST(%a1),&0x80		# return result in fp0
12790	fabs.x		%fp0			# clear sign bit
12791	fneg.x		%fp0			# set sign bit
12792	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
12793	rts
12794
12795fdiv_inf_dst_p:
12796	fmovm.x		DST(%a1),&0x80		# return result in fp0
12797	fabs.x		%fp0			# return positive INF
12798	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
12799	rts
12800
12801#########################################################################
12802# XDEF ****************************************************************	#
12803#	fneg(): emulates the fneg instruction				#
12804#	fsneg(): emulates the fsneg instruction				#
12805#	fdneg(): emulates the fdneg instruction				#
12806#									#
12807# XREF ****************************************************************	#
12808# 	norm() - normalize a denorm to provide EXOP			#
12809#	scale_to_zero_src() - scale sgl/dbl source exponent		#
12810#	ovf_res() - return default overflow result			#
12811#	unf_res() - return default underflow result			#
12812# 	res_qnan_1op() - return QNAN result				#
12813#	res_snan_1op() - return SNAN result				#
12814#									#
12815# INPUT ***************************************************************	#
12816#	a0 = pointer to extended precision source operand		#
12817#	d0 = rnd prec,mode						#
12818#									#
12819# OUTPUT **************************************************************	#
12820#	fp0 = result							#
12821#	fp1 = EXOP (if exception occurred)				#
12822#									#
12823# ALGORITHM ***********************************************************	#
12824#	Handle NANs, zeroes, and infinities as special cases. Separate	#
12825# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
12826# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
12827# and an actual fneg performed to see if overflow/underflow would have	#
12828# occurred. If so, return default underflow/overflow result. Else,	#
12829# scale the result exponent and return result. FPSR gets set based on	#
12830# the result value.							#
12831#									#
12832#########################################################################
12833
12834	global		fsneg
12835fsneg:
12836	andi.b		&0x30,%d0		# clear rnd prec
12837	ori.b		&s_mode*0x10,%d0	# insert sgl precision
12838	bra.b		fneg
12839
12840	global		fdneg
12841fdneg:
12842	andi.b		&0x30,%d0		# clear rnd prec
12843	ori.b		&d_mode*0x10,%d0	# insert dbl prec
12844
12845	global		fneg
12846fneg:
12847	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12848	mov.b		STAG(%a6),%d1
12849	bne.w		fneg_not_norm		# optimize on non-norm input
12850
12851#
12852# NEGATE SIGN : norms and denorms ONLY!
12853#
12854fneg_norm:
12855	andi.b		&0xc0,%d0		# is precision extended?
12856	bne.w		fneg_not_ext		# no; go handle sgl or dbl
12857
12858#
12859# precision selected is extended. so...we can not get an underflow
12860# or overflow because of rounding to the correct precision. so...
12861# skip the scaling and unscaling...
12862#
12863	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12864	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12865	mov.w		SRC_EX(%a0),%d0
12866	eori.w		&0x8000,%d0		# negate sign
12867	bpl.b		fneg_norm_load		# sign is positive
12868	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
12869fneg_norm_load:
12870	mov.w		%d0,FP_SCR0_EX(%a6)
12871	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12872	rts
12873
12874#
12875# for an extended precision DENORM, the UNFL exception bit is set
12876# the accrued bit is NOT set in this instance(no inexactness!)
12877#
12878fneg_denorm:
12879	andi.b		&0xc0,%d0		# is precision extended?
12880	bne.b		fneg_not_ext		# no; go handle sgl or dbl
12881
12882	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12883
12884	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12885	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12886	mov.w		SRC_EX(%a0),%d0
12887	eori.w		&0x8000,%d0		# negate sign
12888	bpl.b		fneg_denorm_done	# no
12889	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
12890fneg_denorm_done:
12891	mov.w		%d0,FP_SCR0_EX(%a6)
12892	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12893
12894	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12895	bne.b		fneg_ext_unfl_ena	# yes
12896	rts
12897
12898#
12899# the input is an extended DENORM and underflow is enabled in the FPCR.
12900# normalize the mantissa and add the bias of 0x6000 to the resulting negative
12901# exponent and insert back into the operand.
12902#
12903fneg_ext_unfl_ena:
12904	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
12905	bsr.l		norm			# normalize result
12906	neg.w		%d0			# new exponent = -(shft val)
12907	addi.w		&0x6000,%d0		# add new bias to exponent
12908	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
12909	andi.w		&0x8000,%d1	 	# keep old sign
12910	andi.w		&0x7fff,%d0		# clear sign position
12911	or.w		%d1,%d0			# concat old sign, new exponent
12912	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
12913	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12914	rts
12915
12916#
12917# operand is either single or double
12918#
12919fneg_not_ext:
12920	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12921	bne.b		fneg_dbl
12922
12923#
12924# operand is to be rounded to single precision
12925#
12926fneg_sgl:
12927	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12928	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12929	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12930	bsr.l		scale_to_zero_src	# calculate scale factor
12931
12932	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
12933	bge.w		fneg_sd_unfl		# yes; go handle underflow
12934	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
12935	beq.w		fneg_sd_may_ovfl	# maybe; go check
12936	blt.w		fneg_sd_ovfl		# yes; go handle overflow
12937
12938#
12939# operand will NOT overflow or underflow when moved in to the fp reg file
12940#
12941fneg_sd_normal:
12942	fmov.l		&0x0,%fpsr		# clear FPSR
12943	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12944
12945	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
12946
12947	fmov.l		%fpsr,%d1		# save FPSR
12948	fmov.l		&0x0,%fpcr		# clear FPCR
12949
12950	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12951
12952fneg_sd_normal_exit:
12953	mov.l		%d2,-(%sp)		# save d2
12954	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12955	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
12956	mov.w		%d1,%d2			# make a copy
12957	andi.l		&0x7fff,%d1		# strip sign
12958	sub.l		%d0,%d1			# add scale factor
12959	andi.w		&0x8000,%d2		# keep old sign
12960	or.w		%d1,%d2			# concat old sign,new exp
12961	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12962	mov.l		(%sp)+,%d2		# restore d2
12963	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12964	rts
12965
12966#
12967# operand is to be rounded to double precision
12968#
12969fneg_dbl:
12970	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12971	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12972	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12973	bsr.l		scale_to_zero_src	# calculate scale factor
12974
12975	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
12976	bge.b		fneg_sd_unfl		# yes; go handle underflow
12977	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
12978	beq.w		fneg_sd_may_ovfl	# maybe; go check
12979	blt.w		fneg_sd_ovfl		# yes; go handle overflow
12980	bra.w		fneg_sd_normal		# no; ho handle normalized op
12981
12982#
12983# operand WILL underflow when moved in to the fp register file
12984#
12985fneg_sd_unfl:
12986	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12987
12988	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
12989	bpl.b		fneg_sd_unfl_tst
12990	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
12991
12992# if underflow or inexact is enabled, go calculate EXOP first.
12993fneg_sd_unfl_tst:
12994	mov.b		FPCR_ENABLE(%a6),%d1
12995	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12996	bne.b		fneg_sd_unfl_ena	# yes
12997
12998fneg_sd_unfl_dis:
12999	lea		FP_SCR0(%a6),%a0	# pass: result addr
13000	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
13001	bsr.l		unf_res			# calculate default result
13002	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
13003	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
13004	rts
13005
13006#
13007# operand will underflow AND underflow is enabled.
13008# therefore, we must return the result rounded to extended precision.
13009#
13010fneg_sd_unfl_ena:
13011	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13012	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13013	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
13014
13015	mov.l		%d2,-(%sp)		# save d2
13016	mov.l		%d1,%d2			# make a copy
13017	andi.l		&0x7fff,%d1		# strip sign
13018	andi.w		&0x8000,%d2		# keep old sign
13019	sub.l		%d0,%d1			# subtract scale factor
13020	addi.l		&0x6000,%d1		# add new bias
13021	andi.w		&0x7fff,%d1
13022	or.w		%d2,%d1			# concat new sign,new exp
13023	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
13024	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
13025	mov.l		(%sp)+,%d2		# restore d2
13026	bra.b		fneg_sd_unfl_dis
13027
13028#
13029# operand WILL overflow.
13030#
13031fneg_sd_ovfl:
13032	fmov.l		&0x0,%fpsr		# clear FPSR
13033	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13034
13035	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
13036
13037	fmov.l		&0x0,%fpcr		# clear FPCR
13038	fmov.l		%fpsr,%d1		# save FPSR
13039
13040	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13041
13042fneg_sd_ovfl_tst:
13043	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13044
13045	mov.b		FPCR_ENABLE(%a6),%d1
13046	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
13047	bne.b		fneg_sd_ovfl_ena	# yes
13048
13049#
13050# OVFL is not enabled; therefore, we must create the default result by
13051# calling ovf_res().
13052#
13053fneg_sd_ovfl_dis:
13054	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
13055	sne		%d1			# set sign param accordingly
13056	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
13057	bsr.l		ovf_res			# calculate default result
13058	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
13059	fmovm.x		(%a0),&0x80		# return default result in fp0
13060	rts
13061
13062#
13063# OVFL is enabled.
13064# the INEX2 bit has already been updated by the round to the correct precision.
13065# now, round to extended(and don't alter the FPSR).
13066#
13067fneg_sd_ovfl_ena:
13068	mov.l		%d2,-(%sp)		# save d2
13069	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
13070	mov.l		%d1,%d2			# make a copy
13071	andi.l		&0x7fff,%d1		# strip sign
13072	andi.w		&0x8000,%d2		# keep old sign
13073	sub.l		%d0,%d1			# add scale factor
13074	subi.l		&0x6000,%d1		# subtract bias
13075	andi.w		&0x7fff,%d1
13076	or.w		%d2,%d1			# concat sign,exp
13077	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
13078	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13079	mov.l		(%sp)+,%d2		# restore d2
13080	bra.b		fneg_sd_ovfl_dis
13081
13082#
13083# the move in MAY underflow. so...
13084#
13085fneg_sd_may_ovfl:
13086	fmov.l		&0x0,%fpsr		# clear FPSR
13087	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13088
13089	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
13090
13091	fmov.l		%fpsr,%d1		# save status
13092	fmov.l		&0x0,%fpcr		# clear FPCR
13093
13094	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13095
13096	fabs.x		%fp0,%fp1		# make a copy of result
13097	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
13098	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
13099
13100# no, it didn't overflow; we have correct result
13101	bra.w		fneg_sd_normal_exit
13102
13103##########################################################################
13104
13105#
13106# input is not normalized; what is it?
13107#
13108fneg_not_norm:
13109	cmpi.b		%d1,&DENORM		# weed out DENORM
13110	beq.w		fneg_denorm
13111	cmpi.b		%d1,&SNAN		# weed out SNAN
13112	beq.l		res_snan_1op
13113	cmpi.b		%d1,&QNAN		# weed out QNAN
13114	beq.l		res_qnan_1op
13115
13116#
13117# do the fneg; at this point, only possible ops are ZERO and INF.
13118# use fneg to determine ccodes.
13119# prec:mode should be zero at this point but it won't affect answer anyways.
13120#
13121	fneg.x		SRC_EX(%a0),%fp0	# do fneg
13122	fmov.l		%fpsr,%d0
13123	rol.l		&0x8,%d0		# put ccodes in lo byte
13124	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
13125	rts
13126
13127#########################################################################
13128# XDEF ****************************************************************	#
13129# 	ftst(): emulates the ftest instruction				#
13130#									#
13131# XREF ****************************************************************	#
13132# 	res{s,q}nan_1op() - set NAN result for monadic instruction	#
13133#									#
13134# INPUT ***************************************************************	#
13135# 	a0 = pointer to extended precision source operand		#
13136#									#
13137# OUTPUT **************************************************************	#
13138#	none								#
13139#									#
13140# ALGORITHM ***********************************************************	#
13141# 	Check the source operand tag (STAG) and set the FPCR according	#
13142# to the operand type and sign.						#
13143#									#
13144#########################################################################
13145
13146	global		ftst
13147ftst:
13148	mov.b		STAG(%a6),%d1
13149	bne.b		ftst_not_norm		# optimize on non-norm input
13150
13151#
13152# Norm:
13153#
13154ftst_norm:
13155	tst.b		SRC_EX(%a0)		# is operand negative?
13156	bmi.b		ftst_norm_m		# yes
13157	rts
13158ftst_norm_m:
13159	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13160	rts
13161
13162#
13163# input is not normalized; what is it?
13164#
13165ftst_not_norm:
13166	cmpi.b		%d1,&ZERO		# weed out ZERO
13167	beq.b		ftst_zero
13168	cmpi.b		%d1,&INF		# weed out INF
13169	beq.b		ftst_inf
13170	cmpi.b		%d1,&SNAN		# weed out SNAN
13171	beq.l		res_snan_1op
13172	cmpi.b		%d1,&QNAN		# weed out QNAN
13173	beq.l		res_qnan_1op
13174
13175#
13176# Denorm:
13177#
13178ftst_denorm:
13179	tst.b		SRC_EX(%a0)		# is operand negative?
13180	bmi.b		ftst_denorm_m		# yes
13181	rts
13182ftst_denorm_m:
13183	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13184	rts
13185
13186#
13187# Infinity:
13188#
13189ftst_inf:
13190	tst.b		SRC_EX(%a0)		# is operand negative?
13191	bmi.b		ftst_inf_m		# yes
13192ftst_inf_p:
13193	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13194	rts
13195ftst_inf_m:
13196	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
13197	rts
13198
13199#
13200# Zero:
13201#
13202ftst_zero:
13203	tst.b		SRC_EX(%a0)		# is operand negative?
13204	bmi.b		ftst_zero_m		# yes
13205ftst_zero_p:
13206	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13207	rts
13208ftst_zero_m:
13209	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
13210	rts
13211
13212#########################################################################
13213# XDEF ****************************************************************	#
13214#	fint(): emulates the fint instruction				#
13215#									#
13216# XREF ****************************************************************	#
13217#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13218#									#
13219# INPUT ***************************************************************	#
13220#	a0 = pointer to extended precision source operand		#
13221#	d0 = round precision/mode					#
13222#									#
13223# OUTPUT **************************************************************	#
13224#	fp0 = result							#
13225#									#
13226# ALGORITHM ***********************************************************	#
13227# 	Separate according to operand type. Unnorms don't pass through 	#
13228# here. For norms, load the rounding mode/prec, execute a "fint", then 	#
13229# store the resulting FPSR bits.					#
13230# 	For denorms, force the j-bit to a one and do the same as for	#
13231# norms. Denorms are so low that the answer will either be a zero or a 	#
13232# one.									#
13233# 	For zeroes/infs/NANs, return the same while setting the FPSR	#
13234# as appropriate.							#
13235#									#
13236#########################################################################
13237
13238	global		fint
13239fint:
13240	mov.b		STAG(%a6),%d1
13241	bne.b		fint_not_norm		# optimize on non-norm input
13242
13243#
13244# Norm:
13245#
13246fint_norm:
13247	andi.b		&0x30,%d0		# set prec = ext
13248
13249	fmov.l		%d0,%fpcr		# set FPCR
13250	fmov.l		&0x0,%fpsr		# clear FPSR
13251
13252	fint.x 		SRC(%a0),%fp0		# execute fint
13253
13254	fmov.l		&0x0,%fpcr		# clear FPCR
13255	fmov.l		%fpsr,%d0		# save FPSR
13256	or.l		%d0,USER_FPSR(%a6)	# set exception bits
13257
13258	rts
13259
13260#
13261# input is not normalized; what is it?
13262#
13263fint_not_norm:
13264	cmpi.b		%d1,&ZERO		# weed out ZERO
13265	beq.b		fint_zero
13266	cmpi.b		%d1,&INF		# weed out INF
13267	beq.b		fint_inf
13268	cmpi.b		%d1,&DENORM		# weed out DENORM
13269	beq.b		fint_denorm
13270	cmpi.b		%d1,&SNAN		# weed out SNAN
13271	beq.l		res_snan_1op
13272	bra.l		res_qnan_1op		# weed out QNAN
13273
13274#
13275# Denorm:
13276#
13277# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
13278# also, the INEX2 and AINEX exception bits will be set.
13279# so, we could either set these manually or force the DENORM
13280# to a very small NORM and ship it to the NORM routine.
13281# I do the latter.
13282#
13283fint_denorm:
13284	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13285	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
13286	lea		FP_SCR0(%a6),%a0
13287	bra.b		fint_norm
13288
13289#
13290# Zero:
13291#
13292fint_zero:
13293	tst.b		SRC_EX(%a0)		# is ZERO negative?
13294	bmi.b		fint_zero_m		# yes
13295fint_zero_p:
13296	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
13297	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13298	rts
13299fint_zero_m:
13300	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
13301	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13302	rts
13303
13304#
13305# Infinity:
13306#
13307fint_inf:
13308	fmovm.x		SRC(%a0),&0x80		# return result in fp0
13309	tst.b		SRC_EX(%a0)		# is INF negative?
13310	bmi.b		fint_inf_m		# yes
13311fint_inf_p:
13312	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13313	rts
13314fint_inf_m:
13315	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13316	rts
13317
13318#########################################################################
13319# XDEF ****************************************************************	#
13320#	fintrz(): emulates the fintrz instruction			#
13321#									#
13322# XREF ****************************************************************	#
13323#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13324#									#
13325# INPUT ***************************************************************	#
13326#	a0 = pointer to extended precision source operand		#
13327#	d0 = round precision/mode					#
13328#									#
13329# OUTPUT **************************************************************	#
13330# 	fp0 = result							#
13331#									#
13332# ALGORITHM ***********************************************************	#
13333#	Separate according to operand type. Unnorms don't pass through	#
13334# here. For norms, load the rounding mode/prec, execute a "fintrz", 	#
13335# then store the resulting FPSR bits.					#
13336# 	For denorms, force the j-bit to a one and do the same as for	#
13337# norms. Denorms are so low that the answer will either be a zero or a	#
13338# one.									#
13339# 	For zeroes/infs/NANs, return the same while setting the FPSR	#
13340# as appropriate.							#
13341#									#
13342#########################################################################
13343
13344	global		fintrz
13345fintrz:
13346	mov.b		STAG(%a6),%d1
13347	bne.b		fintrz_not_norm		# optimize on non-norm input
13348
13349#
13350# Norm:
13351#
13352fintrz_norm:
13353	fmov.l		&0x0,%fpsr		# clear FPSR
13354
13355	fintrz.x	SRC(%a0),%fp0		# execute fintrz
13356
13357	fmov.l		%fpsr,%d0		# save FPSR
13358	or.l		%d0,USER_FPSR(%a6)	# set exception bits
13359
13360	rts
13361
13362#
13363# input is not normalized; what is it?
13364#
13365fintrz_not_norm:
13366	cmpi.b		%d1,&ZERO		# weed out ZERO
13367	beq.b		fintrz_zero
13368	cmpi.b		%d1,&INF		# weed out INF
13369	beq.b		fintrz_inf
13370	cmpi.b		%d1,&DENORM		# weed out DENORM
13371	beq.b		fintrz_denorm
13372	cmpi.b		%d1,&SNAN		# weed out SNAN
13373	beq.l		res_snan_1op
13374	bra.l		res_qnan_1op		# weed out QNAN
13375
13376#
13377# Denorm:
13378#
13379# for DENORMs, the result will be (+/-)ZERO.
13380# also, the INEX2 and AINEX exception bits will be set.
13381# so, we could either set these manually or force the DENORM
13382# to a very small NORM and ship it to the NORM routine.
13383# I do the latter.
13384#
13385fintrz_denorm:
13386	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13387	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
13388	lea		FP_SCR0(%a6),%a0
13389	bra.b		fintrz_norm
13390
13391#
13392# Zero:
13393#
13394fintrz_zero:
13395	tst.b		SRC_EX(%a0)		# is ZERO negative?
13396	bmi.b		fintrz_zero_m		# yes
13397fintrz_zero_p:
13398	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
13399	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13400	rts
13401fintrz_zero_m:
13402	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
13403	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13404	rts
13405
13406#
13407# Infinity:
13408#
13409fintrz_inf:
13410	fmovm.x		SRC(%a0),&0x80		# return result in fp0
13411	tst.b		SRC_EX(%a0)		# is INF negative?
13412	bmi.b		fintrz_inf_m		# yes
13413fintrz_inf_p:
13414	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13415	rts
13416fintrz_inf_m:
13417	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13418	rts
13419
13420#########################################################################
13421# XDEF ****************************************************************	#
13422#	fabs():  emulates the fabs instruction				#
13423#	fsabs(): emulates the fsabs instruction				#
13424#	fdabs(): emulates the fdabs instruction				#
13425#									#
13426# XREF **************************************************************** #
13427#	norm() - normalize denorm mantissa to provide EXOP		#
13428#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
13429#	unf_res() - calculate underflow result				#
13430#	ovf_res() - calculate overflow result				#
13431#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13432#									#
13433# INPUT *************************************************************** #
13434#	a0 = pointer to extended precision source operand		#
13435#	d0 = rnd precision/mode						#
13436#									#
13437# OUTPUT ************************************************************** #
13438#	fp0 = result							#
13439#	fp1 = EXOP (if exception occurred)				#
13440#									#
13441# ALGORITHM ***********************************************************	#
13442#	Handle NANs, infinities, and zeroes as special cases. Divide	#
13443# norms into extended, single, and double precision. 			#
13444# 	Simply clear sign for extended precision norm. Ext prec denorm	#
13445# gets an EXOP created for it since it's an underflow.			#
13446#	Double and single precision can overflow and underflow. First,	#
13447# scale the operand such that the exponent is zero. Perform an "fabs"	#
13448# using the correct rnd mode/prec. Check to see if the original 	#
13449# exponent would take an exception. If so, use unf_res() or ovf_res()	#
13450# to calculate the default result. Also, create the EXOP for the	#
13451# exceptional case. If no exception should occur, insert the correct 	#
13452# result exponent and return.						#
13453# 	Unnorms don't pass through here.				#
13454#									#
13455#########################################################################
13456
13457	global		fsabs
13458fsabs:
13459	andi.b		&0x30,%d0		# clear rnd prec
13460	ori.b		&s_mode*0x10,%d0	# insert sgl precision
13461	bra.b		fabs
13462
13463	global		fdabs
13464fdabs:
13465	andi.b		&0x30,%d0		# clear rnd prec
13466	ori.b		&d_mode*0x10,%d0	# insert dbl precision
13467
13468	global		fabs
13469fabs:
13470	mov.l		%d0,L_SCR3(%a6)		# store rnd info
13471	mov.b		STAG(%a6),%d1
13472	bne.w		fabs_not_norm		# optimize on non-norm input
13473
13474#
13475# ABSOLUTE VALUE: norms and denorms ONLY!
13476#
13477fabs_norm:
13478	andi.b		&0xc0,%d0		# is precision extended?
13479	bne.b		fabs_not_ext		# no; go handle sgl or dbl
13480
13481#
13482# precision selected is extended. so...we can not get an underflow
13483# or overflow because of rounding to the correct precision. so...
13484# skip the scaling and unscaling...
13485#
13486	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13487	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13488	mov.w		SRC_EX(%a0),%d1
13489	bclr		&15,%d1			# force absolute value
13490	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
13491	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
13492	rts
13493
13494#
13495# for an extended precision DENORM, the UNFL exception bit is set
13496# the accrued bit is NOT set in this instance(no inexactness!)
13497#
13498fabs_denorm:
13499	andi.b		&0xc0,%d0		# is precision extended?
13500	bne.b		fabs_not_ext		# no
13501
13502	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13503
13504	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13505	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13506	mov.w		SRC_EX(%a0),%d0
13507	bclr		&15,%d0			# clear sign
13508	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
13509
13510	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
13511
13512	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
13513	bne.b		fabs_ext_unfl_ena
13514	rts
13515
13516#
13517# the input is an extended DENORM and underflow is enabled in the FPCR.
13518# normalize the mantissa and add the bias of 0x6000 to the resulting negative
13519# exponent and insert back into the operand.
13520#
13521fabs_ext_unfl_ena:
13522	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
13523	bsr.l		norm			# normalize result
13524	neg.w		%d0			# new exponent = -(shft val)
13525	addi.w		&0x6000,%d0		# add new bias to exponent
13526	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
13527	andi.w		&0x8000,%d1		# keep old sign
13528	andi.w		&0x7fff,%d0		# clear sign position
13529	or.w		%d1,%d0			# concat old sign, new exponent
13530	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
13531	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13532	rts
13533
13534#
13535# operand is either single or double
13536#
13537fabs_not_ext:
13538	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
13539	bne.b		fabs_dbl
13540
13541#
13542# operand is to be rounded to single precision
13543#
13544fabs_sgl:
13545	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13546	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13547	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13548	bsr.l		scale_to_zero_src	# calculate scale factor
13549
13550	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
13551	bge.w		fabs_sd_unfl		# yes; go handle underflow
13552	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
13553	beq.w		fabs_sd_may_ovfl	# maybe; go check
13554	blt.w		fabs_sd_ovfl		# yes; go handle overflow
13555
13556#
13557# operand will NOT overflow or underflow when moved in to the fp reg file
13558#
13559fabs_sd_normal:
13560	fmov.l		&0x0,%fpsr		# clear FPSR
13561	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13562
13563	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13564
13565	fmov.l		%fpsr,%d1		# save FPSR
13566	fmov.l		&0x0,%fpcr		# clear FPCR
13567
13568	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13569
13570fabs_sd_normal_exit:
13571	mov.l		%d2,-(%sp)		# save d2
13572	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
13573	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
13574	mov.l		%d1,%d2			# make a copy
13575	andi.l		&0x7fff,%d1		# strip sign
13576	sub.l		%d0,%d1			# add scale factor
13577	andi.w		&0x8000,%d2		# keep old sign
13578	or.w		%d1,%d2			# concat old sign,new exp
13579	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
13580	mov.l		(%sp)+,%d2		# restore d2
13581	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
13582	rts
13583
13584#
13585# operand is to be rounded to double precision
13586#
13587fabs_dbl:
13588	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13589	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13590	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13591	bsr.l		scale_to_zero_src	# calculate scale factor
13592
13593	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
13594	bge.b		fabs_sd_unfl		# yes; go handle underflow
13595	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
13596	beq.w		fabs_sd_may_ovfl	# maybe; go check
13597	blt.w		fabs_sd_ovfl		# yes; go handle overflow
13598	bra.w		fabs_sd_normal		# no; ho handle normalized op
13599
13600#
13601# operand WILL underflow when moved in to the fp register file
13602#
13603fabs_sd_unfl:
13604	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13605
13606	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
13607
13608# if underflow or inexact is enabled, go calculate EXOP first.
13609	mov.b		FPCR_ENABLE(%a6),%d1
13610	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
13611	bne.b		fabs_sd_unfl_ena	# yes
13612
13613fabs_sd_unfl_dis:
13614	lea		FP_SCR0(%a6),%a0	# pass: result addr
13615	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
13616	bsr.l		unf_res			# calculate default result
13617	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
13618	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
13619	rts
13620
13621#
13622# operand will underflow AND underflow is enabled.
13623# therefore, we must return the result rounded to extended precision.
13624#
13625fabs_sd_unfl_ena:
13626	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13627	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13628	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
13629
13630	mov.l		%d2,-(%sp)		# save d2
13631	mov.l		%d1,%d2			# make a copy
13632	andi.l		&0x7fff,%d1		# strip sign
13633	andi.w		&0x8000,%d2		# keep old sign
13634	sub.l		%d0,%d1			# subtract scale factor
13635	addi.l		&0x6000,%d1		# add new bias
13636	andi.w		&0x7fff,%d1
13637	or.w		%d2,%d1			# concat new sign,new exp
13638	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
13639	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
13640	mov.l		(%sp)+,%d2		# restore d2
13641	bra.b		fabs_sd_unfl_dis
13642
13643#
13644# operand WILL overflow.
13645#
13646fabs_sd_ovfl:
13647	fmov.l		&0x0,%fpsr		# clear FPSR
13648	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13649
13650	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13651
13652	fmov.l		&0x0,%fpcr		# clear FPCR
13653	fmov.l		%fpsr,%d1		# save FPSR
13654
13655	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13656
13657fabs_sd_ovfl_tst:
13658	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13659
13660	mov.b		FPCR_ENABLE(%a6),%d1
13661	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
13662	bne.b		fabs_sd_ovfl_ena	# yes
13663
13664#
13665# OVFL is not enabled; therefore, we must create the default result by
13666# calling ovf_res().
13667#
13668fabs_sd_ovfl_dis:
13669	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
13670	sne		%d1			# set sign param accordingly
13671	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
13672	bsr.l		ovf_res			# calculate default result
13673	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
13674	fmovm.x		(%a0),&0x80		# return default result in fp0
13675	rts
13676
13677#
13678# OVFL is enabled.
13679# the INEX2 bit has already been updated by the round to the correct precision.
13680# now, round to extended(and don't alter the FPSR).
13681#
13682fabs_sd_ovfl_ena:
13683	mov.l		%d2,-(%sp)		# save d2
13684	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
13685	mov.l		%d1,%d2			# make a copy
13686	andi.l		&0x7fff,%d1		# strip sign
13687	andi.w		&0x8000,%d2		# keep old sign
13688	sub.l		%d0,%d1			# add scale factor
13689	subi.l		&0x6000,%d1		# subtract bias
13690	andi.w		&0x7fff,%d1
13691	or.w		%d2,%d1			# concat sign,exp
13692	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
13693	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13694	mov.l		(%sp)+,%d2		# restore d2
13695	bra.b		fabs_sd_ovfl_dis
13696
13697#
13698# the move in MAY underflow. so...
13699#
13700fabs_sd_may_ovfl:
13701	fmov.l		&0x0,%fpsr		# clear FPSR
13702	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13703
13704	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13705
13706	fmov.l		%fpsr,%d1		# save status
13707	fmov.l		&0x0,%fpcr		# clear FPCR
13708
13709	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13710
13711	fabs.x		%fp0,%fp1		# make a copy of result
13712	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
13713	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
13714
13715# no, it didn't overflow; we have correct result
13716	bra.w		fabs_sd_normal_exit
13717
13718##########################################################################
13719
13720#
13721# input is not normalized; what is it?
13722#
13723fabs_not_norm:
13724	cmpi.b		%d1,&DENORM		# weed out DENORM
13725	beq.w		fabs_denorm
13726	cmpi.b		%d1,&SNAN		# weed out SNAN
13727	beq.l		res_snan_1op
13728	cmpi.b		%d1,&QNAN		# weed out QNAN
13729	beq.l		res_qnan_1op
13730
13731	fabs.x		SRC(%a0),%fp0		# force absolute value
13732
13733	cmpi.b		%d1,&INF		# weed out INF
13734	beq.b		fabs_inf
13735fabs_zero:
13736	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13737	rts
13738fabs_inf:
13739	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13740	rts
13741
13742#########################################################################
13743# XDEF ****************************************************************	#
13744# 	fcmp(): fp compare op routine					#
13745#									#
13746# XREF ****************************************************************	#
13747# 	res_qnan() - return QNAN result					#
13748#	res_snan() - return SNAN result					#
13749#									#
13750# INPUT ***************************************************************	#
13751#	a0 = pointer to extended precision source operand		#
13752#	a1 = pointer to extended precision destination operand		#
13753#	d0 = round prec/mode						#
13754#									#
13755# OUTPUT ************************************************************** #
13756#	None								#
13757#									#
13758# ALGORITHM ***********************************************************	#
13759# 	Handle NANs and denorms as special cases. For everything else,	#
13760# just use the actual fcmp instruction to produce the correct condition	#
13761# codes.								#
13762#									#
13763#########################################################################
13764
13765	global		fcmp
13766fcmp:
13767	clr.w		%d1
13768	mov.b		DTAG(%a6),%d1
13769	lsl.b		&0x3,%d1
13770	or.b		STAG(%a6),%d1
13771	bne.b		fcmp_not_norm		# optimize on non-norm input
13772
13773#
13774# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
13775#
13776fcmp_norm:
13777	fmovm.x		DST(%a1),&0x80		# load dst op
13778
13779	fcmp.x 		%fp0,SRC(%a0)		# do compare
13780
13781	fmov.l		%fpsr,%d0		# save FPSR
13782	rol.l		&0x8,%d0		# extract ccode bits
13783	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
13784
13785	rts
13786
13787#
13788# fcmp: inputs are not both normalized; what are they?
13789#
13790fcmp_not_norm:
13791	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
13792	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
13793
13794	swbeg		&48
13795tbl_fcmp_op:
13796	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
13797	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
13798	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
13799	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
13800	short		fcmp_nrm_dnrm 	- tbl_fcmp_op # NORM - DENORM
13801	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
13802	short		tbl_fcmp_op	- tbl_fcmp_op #
13803	short		tbl_fcmp_op	- tbl_fcmp_op #
13804
13805	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
13806	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
13807	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
13808	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
13809	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
13810	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
13811	short		tbl_fcmp_op	- tbl_fcmp_op #
13812	short		tbl_fcmp_op	- tbl_fcmp_op #
13813
13814	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
13815	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
13816	short		fcmp_norm	- tbl_fcmp_op # INF - INF
13817	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
13818	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
13819	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
13820	short		tbl_fcmp_op	- tbl_fcmp_op #
13821	short		tbl_fcmp_op	- tbl_fcmp_op #
13822
13823	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
13824	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
13825	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
13826	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
13827	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
13828	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
13829	short		tbl_fcmp_op	- tbl_fcmp_op #
13830	short		tbl_fcmp_op	- tbl_fcmp_op #
13831
13832	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
13833	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
13834	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
13835	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
13836	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
13837	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
13838	short		tbl_fcmp_op	- tbl_fcmp_op #
13839	short		tbl_fcmp_op	- tbl_fcmp_op #
13840
13841	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
13842	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
13843	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
13844	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
13845	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
13846	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
13847	short		tbl_fcmp_op	- tbl_fcmp_op #
13848	short		tbl_fcmp_op	- tbl_fcmp_op #
13849
13850# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
13851# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
13852fcmp_res_qnan:
13853	bsr.l		res_qnan
13854	andi.b		&0xf7,FPSR_CC(%a6)
13855	rts
13856fcmp_res_snan:
13857	bsr.l		res_snan
13858	andi.b		&0xf7,FPSR_CC(%a6)
13859	rts
13860
13861#
13862# DENORMs are a little more difficult.
13863# If you have a 2 DENORMs, then you can just force the j-bit to a one
13864# and use the fcmp_norm routine.
13865# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
13866# and use the fcmp_norm routine.
13867# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
13868# But with a DENORM and a NORM of the same sign, the neg bit is set if the
13869# (1) signs are (+) and the DENORM is the dst or
13870# (2) signs are (-) and the DENORM is the src
13871#
13872
13873fcmp_dnrm_s:
13874	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13875	mov.l		SRC_HI(%a0),%d0
13876	bset		&31,%d0			# DENORM src; make into small norm
13877	mov.l		%d0,FP_SCR0_HI(%a6)
13878	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13879	lea		FP_SCR0(%a6),%a0
13880	bra.w		fcmp_norm
13881
13882fcmp_dnrm_d:
13883	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
13884	mov.l		DST_HI(%a1),%d0
13885	bset		&31,%d0			# DENORM src; make into small norm
13886	mov.l		%d0,FP_SCR0_HI(%a6)
13887	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
13888	lea		FP_SCR0(%a6),%a1
13889	bra.w		fcmp_norm
13890
13891fcmp_dnrm_sd:
13892	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
13893	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13894	mov.l		DST_HI(%a1),%d0
13895	bset		&31,%d0			# DENORM dst; make into small norm
13896	mov.l		%d0,FP_SCR1_HI(%a6)
13897	mov.l		SRC_HI(%a0),%d0
13898	bset		&31,%d0			# DENORM dst; make into small norm
13899	mov.l		%d0,FP_SCR0_HI(%a6)
13900	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
13901	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13902	lea		FP_SCR1(%a6),%a1
13903	lea		FP_SCR0(%a6),%a0
13904	bra.w		fcmp_norm
13905
13906fcmp_nrm_dnrm:
13907	mov.b		SRC_EX(%a0),%d0		# determine if like signs
13908	mov.b		DST_EX(%a1),%d1
13909	eor.b		%d0,%d1
13910	bmi.w		fcmp_dnrm_s
13911
13912# signs are the same, so must determine the answer ourselves.
13913	tst.b		%d0			# is src op negative?
13914	bmi.b		fcmp_nrm_dnrm_m		# yes
13915	rts
13916fcmp_nrm_dnrm_m:
13917	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13918	rts
13919
13920fcmp_dnrm_nrm:
13921	mov.b		SRC_EX(%a0),%d0		# determine if like signs
13922	mov.b		DST_EX(%a1),%d1
13923	eor.b		%d0,%d1
13924	bmi.w		fcmp_dnrm_d
13925
13926# signs are the same, so must determine the answer ourselves.
13927	tst.b		%d0			# is src op negative?
13928	bpl.b		fcmp_dnrm_nrm_m		# no
13929	rts
13930fcmp_dnrm_nrm_m:
13931	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13932	rts
13933
13934#########################################################################
13935# XDEF ****************************************************************	#
13936# 	fsglmul(): emulates the fsglmul instruction			#
13937#									#
13938# XREF ****************************************************************	#
13939#	scale_to_zero_src() - scale src exponent to zero		#
13940#	scale_to_zero_dst() - scale dst exponent to zero		#
13941#	unf_res4() - return default underflow result for sglop		#
13942#	ovf_res() - return default overflow result			#
13943# 	res_qnan() - return QNAN result					#
13944# 	res_snan() - return SNAN result					#
13945#									#
13946# INPUT ***************************************************************	#
13947#	a0 = pointer to extended precision source operand		#
13948#	a1 = pointer to extended precision destination operand		#
13949#	d0  rnd prec,mode						#
13950#									#
13951# OUTPUT **************************************************************	#
13952#	fp0 = result							#
13953#	fp1 = EXOP (if exception occurred)				#
13954#									#
13955# ALGORITHM ***********************************************************	#
13956#	Handle NANs, infinities, and zeroes as special cases. Divide	#
13957# norms/denorms into ext/sgl/dbl precision.				#
13958#	For norms/denorms, scale the exponents such that a multiply	#
13959# instruction won't cause an exception. Use the regular fsglmul to	#
13960# compute a result. Check if the regular operands would have taken	#
13961# an exception. If so, return the default overflow/underflow result	#
13962# and return the EXOP if exceptions are enabled. Else, scale the 	#
13963# result operand to the proper exponent.				#
13964#									#
13965#########################################################################
13966
13967	global		fsglmul
13968fsglmul:
13969	mov.l		%d0,L_SCR3(%a6)		# store rnd info
13970
13971	clr.w		%d1
13972	mov.b		DTAG(%a6),%d1
13973	lsl.b		&0x3,%d1
13974	or.b		STAG(%a6),%d1
13975
13976	bne.w		fsglmul_not_norm	# optimize on non-norm input
13977
13978fsglmul_norm:
13979	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
13980	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
13981	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
13982
13983	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13984	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13985	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13986
13987	bsr.l		scale_to_zero_src	# scale exponent
13988	mov.l		%d0,-(%sp)		# save scale factor 1
13989
13990	bsr.l		scale_to_zero_dst	# scale dst exponent
13991
13992	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
13993
13994	cmpi.l		%d0,&0x3fff-0x7ffe 	# would result ovfl?
13995	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
13996	blt.w		fsglmul_ovfl		# result will overflow
13997
13998	cmpi.l		%d0,&0x3fff+0x0001 	# would result unfl?
13999	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
14000	bgt.w		fsglmul_unfl		# result will underflow
14001
14002fsglmul_normal:
14003	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14004
14005	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14006	fmov.l		&0x0,%fpsr		# clear FPSR
14007
14008	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14009
14010	fmov.l		%fpsr,%d1		# save status
14011	fmov.l		&0x0,%fpcr		# clear FPCR
14012
14013	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14014
14015fsglmul_normal_exit:
14016	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14017	mov.l		%d2,-(%sp)		# save d2
14018	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
14019	mov.l		%d1,%d2			# make a copy
14020	andi.l		&0x7fff,%d1		# strip sign
14021	andi.w		&0x8000,%d2		# keep old sign
14022	sub.l		%d0,%d1			# add scale factor
14023	or.w		%d2,%d1			# concat old sign,new exp
14024	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14025	mov.l		(%sp)+,%d2		# restore d2
14026	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
14027	rts
14028
14029fsglmul_ovfl:
14030	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14031
14032	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14033	fmov.l		&0x0,%fpsr		# clear FPSR
14034
14035	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14036
14037	fmov.l		%fpsr,%d1		# save status
14038	fmov.l		&0x0,%fpcr		# clear FPCR
14039
14040	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14041
14042fsglmul_ovfl_tst:
14043
14044# save setting this until now because this is where fsglmul_may_ovfl may jump in
14045	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
14046
14047	mov.b		FPCR_ENABLE(%a6),%d1
14048	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14049	bne.b		fsglmul_ovfl_ena	# yes
14050
14051fsglmul_ovfl_dis:
14052	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
14053	sne		%d1			# set sign param accordingly
14054	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14055	andi.b		&0x30,%d0		# force prec = ext
14056	bsr.l		ovf_res			# calculate default result
14057	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
14058	fmovm.x		(%a0),&0x80		# return default result in fp0
14059	rts
14060
14061fsglmul_ovfl_ena:
14062	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
14063
14064	mov.l		%d2,-(%sp)		# save d2
14065	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14066	mov.l		%d1,%d2			# make a copy
14067	andi.l		&0x7fff,%d1		# strip sign
14068	sub.l		%d0,%d1			# add scale factor
14069	subi.l		&0x6000,%d1		# subtract bias
14070	andi.w		&0x7fff,%d1
14071	andi.w		&0x8000,%d2		# keep old sign
14072	or.w		%d2,%d1			# concat old sign,new exp
14073	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14074	mov.l		(%sp)+,%d2		# restore d2
14075	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14076	bra.b		fsglmul_ovfl_dis
14077
14078fsglmul_may_ovfl:
14079	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14080
14081	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14082	fmov.l		&0x0,%fpsr		# clear FPSR
14083
14084	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14085
14086	fmov.l		%fpsr,%d1		# save status
14087	fmov.l		&0x0,%fpcr		# clear FPCR
14088
14089	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14090
14091	fabs.x		%fp0,%fp1		# make a copy of result
14092	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
14093	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
14094
14095# no, it didn't overflow; we have correct result
14096	bra.w		fsglmul_normal_exit
14097
14098fsglmul_unfl:
14099	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14100
14101	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14102
14103	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14104	fmov.l		&0x0,%fpsr		# clear FPSR
14105
14106	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14107
14108	fmov.l		%fpsr,%d1		# save status
14109	fmov.l		&0x0,%fpcr		# clear FPCR
14110
14111	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14112
14113	mov.b		FPCR_ENABLE(%a6),%d1
14114	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14115	bne.b		fsglmul_unfl_ena	# yes
14116
14117fsglmul_unfl_dis:
14118	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14119
14120	lea		FP_SCR0(%a6),%a0	# pass: result addr
14121	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14122	bsr.l		unf_res4		# calculate default result
14123	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14124	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14125	rts
14126
14127#
14128# UNFL is enabled.
14129#
14130fsglmul_unfl_ena:
14131	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14132
14133	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14134	fmov.l		&0x0,%fpsr		# clear FPSR
14135
14136	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
14137
14138	fmov.l		&0x0,%fpcr		# clear FPCR
14139
14140	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14141	mov.l		%d2,-(%sp)		# save d2
14142	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14143	mov.l		%d1,%d2			# make a copy
14144	andi.l		&0x7fff,%d1		# strip sign
14145	andi.w		&0x8000,%d2		# keep old sign
14146	sub.l		%d0,%d1			# add scale factor
14147	addi.l		&0x6000,%d1		# add bias
14148	andi.w		&0x7fff,%d1
14149	or.w		%d2,%d1			# concat old sign,new exp
14150	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14151	mov.l		(%sp)+,%d2		# restore d2
14152	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14153	bra.w		fsglmul_unfl_dis
14154
14155fsglmul_may_unfl:
14156	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14157
14158	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14159	fmov.l		&0x0,%fpsr		# clear FPSR
14160
14161	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14162
14163	fmov.l		%fpsr,%d1		# save status
14164	fmov.l		&0x0,%fpcr		# clear FPCR
14165
14166	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14167
14168	fabs.x		%fp0,%fp1		# make a copy of result
14169	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
14170	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
14171	fblt.w		fsglmul_unfl		# yes; underflow occurred
14172
14173#
14174# we still don't know if underflow occurred. result is ~ equal to 2. but,
14175# we don't know if the result was an underflow that rounded up to a 2 or
14176# a normalized number that rounded down to a 2. so, redo the entire operation
14177# using RZ as the rounding mode to see what the pre-rounded result is.
14178# this case should be relatively rare.
14179#
14180	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
14181
14182	mov.l		L_SCR3(%a6),%d1
14183	andi.b		&0xc0,%d1		# keep rnd prec
14184	ori.b		&rz_mode*0x10,%d1	# insert RZ
14185
14186	fmov.l		%d1,%fpcr		# set FPCR
14187	fmov.l		&0x0,%fpsr		# clear FPSR
14188
14189	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
14190
14191	fmov.l		&0x0,%fpcr		# clear FPCR
14192	fabs.x		%fp1			# make absolute value
14193	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
14194	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
14195	bra.w		fsglmul_unfl		# yes, underflow occurred
14196
14197##############################################################################
14198
14199#
14200# Single Precision Multiply: inputs are not both normalized; what are they?
14201#
14202fsglmul_not_norm:
14203	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
14204	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
14205
14206	swbeg		&48
14207tbl_fsglmul_op:
14208	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
14209	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
14210	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
14211	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
14212	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
14213	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
14214	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14215	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14216
14217	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
14218	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
14219	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
14220	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
14221	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
14222	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
14223	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14224	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14225
14226	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
14227	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
14228	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
14229	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
14230	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
14231	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
14232	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14233	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14234
14235	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
14236	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
14237	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
14238	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
14239	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
14240	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
14241	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14242	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14243
14244	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
14245	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
14246	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
14247	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
14248	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
14249	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
14250	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14251	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14252
14253	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
14254	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
14255	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
14256	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
14257	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
14258	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
14259	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14260	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14261
14262fsglmul_res_operr:
14263	bra.l		res_operr
14264fsglmul_res_snan:
14265	bra.l		res_snan
14266fsglmul_res_qnan:
14267	bra.l		res_qnan
14268fsglmul_zero:
14269	bra.l		fmul_zero
14270fsglmul_inf_src:
14271	bra.l		fmul_inf_src
14272fsglmul_inf_dst:
14273	bra.l		fmul_inf_dst
14274
14275#########################################################################
14276# XDEF ****************************************************************	#
14277# 	fsgldiv(): emulates the fsgldiv instruction			#
14278#									#
14279# XREF ****************************************************************	#
14280#	scale_to_zero_src() - scale src exponent to zero		#
14281#	scale_to_zero_dst() - scale dst exponent to zero		#
14282#	unf_res4() - return default underflow result for sglop		#
14283#	ovf_res() - return default overflow result			#
14284# 	res_qnan() - return QNAN result					#
14285# 	res_snan() - return SNAN result					#
14286#									#
14287# INPUT ***************************************************************	#
14288#	a0 = pointer to extended precision source operand		#
14289#	a1 = pointer to extended precision destination operand		#
14290#	d0  rnd prec,mode						#
14291#									#
14292# OUTPUT **************************************************************	#
14293#	fp0 = result							#
14294#	fp1 = EXOP (if exception occurred)				#
14295#									#
14296# ALGORITHM ***********************************************************	#
14297#	Handle NANs, infinities, and zeroes as special cases. Divide	#
14298# norms/denorms into ext/sgl/dbl precision.				#
14299#	For norms/denorms, scale the exponents such that a divide	#
14300# instruction won't cause an exception. Use the regular fsgldiv to	#
14301# compute a result. Check if the regular operands would have taken	#
14302# an exception. If so, return the default overflow/underflow result	#
14303# and return the EXOP if exceptions are enabled. Else, scale the 	#
14304# result operand to the proper exponent.				#
14305#									#
14306#########################################################################
14307
14308	global		fsgldiv
14309fsgldiv:
14310	mov.l		%d0,L_SCR3(%a6)		# store rnd info
14311
14312	clr.w		%d1
14313	mov.b		DTAG(%a6),%d1
14314	lsl.b		&0x3,%d1
14315	or.b		STAG(%a6),%d1		# combine src tags
14316
14317	bne.w		fsgldiv_not_norm	# optimize on non-norm input
14318
14319#
14320# DIVIDE: NORMs and DENORMs ONLY!
14321#
14322fsgldiv_norm:
14323	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
14324	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
14325	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
14326
14327	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
14328	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
14329	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
14330
14331	bsr.l		scale_to_zero_src	# calculate scale factor 1
14332	mov.l		%d0,-(%sp)		# save scale factor 1
14333
14334	bsr.l		scale_to_zero_dst	# calculate scale factor 2
14335
14336	neg.l		(%sp)			# S.F. = scale1 - scale2
14337	add.l		%d0,(%sp)
14338
14339	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
14340	lsr.b		&0x6,%d1
14341	mov.l		(%sp)+,%d0
14342	cmpi.l		%d0,&0x3fff-0x7ffe
14343	ble.w		fsgldiv_may_ovfl
14344
14345	cmpi.l		%d0,&0x3fff-0x0000 	# will result underflow?
14346	beq.w		fsgldiv_may_unfl	# maybe
14347	bgt.w		fsgldiv_unfl		# yes; go handle underflow
14348
14349fsgldiv_normal:
14350	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14351
14352	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
14353	fmov.l		&0x0,%fpsr		# clear FPSR
14354
14355	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
14356
14357	fmov.l		%fpsr,%d1		# save FPSR
14358	fmov.l		&0x0,%fpcr		# clear FPCR
14359
14360	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14361
14362fsgldiv_normal_exit:
14363	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
14364	mov.l		%d2,-(%sp)		# save d2
14365	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
14366	mov.l		%d1,%d2			# make a copy
14367	andi.l		&0x7fff,%d1		# strip sign
14368	andi.w		&0x8000,%d2		# keep old sign
14369	sub.l		%d0,%d1			# add scale factor
14370	or.w		%d2,%d1			# concat old sign,new exp
14371	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14372	mov.l		(%sp)+,%d2		# restore d2
14373	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
14374	rts
14375
14376fsgldiv_may_ovfl:
14377	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14378
14379	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14380	fmov.l		&0x0,%fpsr		# set FPSR
14381
14382	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
14383
14384	fmov.l		%fpsr,%d1
14385	fmov.l		&0x0,%fpcr
14386
14387	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
14388
14389	fmovm.x		&0x01,-(%sp)		# save result to stack
14390	mov.w		(%sp),%d1		# fetch new exponent
14391	add.l		&0xc,%sp		# clear result
14392	andi.l		&0x7fff,%d1		# strip sign
14393	sub.l		%d0,%d1			# add scale factor
14394	cmp.l		%d1,&0x7fff		# did divide overflow?
14395	blt.b		fsgldiv_normal_exit
14396
14397fsgldiv_ovfl_tst:
14398	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
14399
14400	mov.b		FPCR_ENABLE(%a6),%d1
14401	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14402	bne.b		fsgldiv_ovfl_ena	# yes
14403
14404fsgldiv_ovfl_dis:
14405	btst		&neg_bit,FPSR_CC(%a6) 	# is result negative
14406	sne		%d1			# set sign param accordingly
14407	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14408	andi.b		&0x30,%d0		# kill precision
14409	bsr.l		ovf_res			# calculate default result
14410	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
14411	fmovm.x		(%a0),&0x80		# return default result in fp0
14412	rts
14413
14414fsgldiv_ovfl_ena:
14415	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
14416
14417	mov.l		%d2,-(%sp)		# save d2
14418	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14419	mov.l		%d1,%d2			# make a copy
14420	andi.l		&0x7fff,%d1		# strip sign
14421	andi.w		&0x8000,%d2		# keep old sign
14422	sub.l		%d0,%d1			# add scale factor
14423	subi.l		&0x6000,%d1		# subtract new bias
14424	andi.w		&0x7fff,%d1		# clear ms bit
14425	or.w		%d2,%d1			# concat old sign,new exp
14426	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14427	mov.l		(%sp)+,%d2		# restore d2
14428	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14429	bra.b		fsgldiv_ovfl_dis
14430
14431fsgldiv_unfl:
14432	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14433
14434	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14435
14436	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14437	fmov.l		&0x0,%fpsr		# clear FPSR
14438
14439	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
14440
14441	fmov.l		%fpsr,%d1		# save status
14442	fmov.l		&0x0,%fpcr		# clear FPCR
14443
14444	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14445
14446	mov.b		FPCR_ENABLE(%a6),%d1
14447	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14448	bne.b		fsgldiv_unfl_ena	# yes
14449
14450fsgldiv_unfl_dis:
14451	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14452
14453	lea		FP_SCR0(%a6),%a0	# pass: result addr
14454	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14455	bsr.l		unf_res4		# calculate default result
14456	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14457	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14458	rts
14459
14460#
14461# UNFL is enabled.
14462#
14463fsgldiv_unfl_ena:
14464	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14465
14466	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14467	fmov.l		&0x0,%fpsr		# clear FPSR
14468
14469	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
14470
14471	fmov.l		&0x0,%fpcr		# clear FPCR
14472
14473	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14474	mov.l		%d2,-(%sp)		# save d2
14475	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14476	mov.l		%d1,%d2			# make a copy
14477	andi.l		&0x7fff,%d1		# strip sign
14478	andi.w		&0x8000,%d2		# keep old sign
14479	sub.l		%d0,%d1			# add scale factor
14480	addi.l		&0x6000,%d1		# add bias
14481	andi.w		&0x7fff,%d1		# clear top bit
14482	or.w		%d2,%d1			# concat old sign, new exp
14483	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14484	mov.l		(%sp)+,%d2		# restore d2
14485	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14486	bra.b		fsgldiv_unfl_dis
14487
14488#
14489# the divide operation MAY underflow:
14490#
14491fsgldiv_may_unfl:
14492	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14493
14494	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14495	fmov.l		&0x0,%fpsr		# clear FPSR
14496
14497	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
14498
14499	fmov.l		%fpsr,%d1		# save status
14500	fmov.l		&0x0,%fpcr		# clear FPCR
14501
14502	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14503
14504	fabs.x		%fp0,%fp1		# make a copy of result
14505	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
14506	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
14507	fblt.w		fsgldiv_unfl		# yes; underflow occurred
14508
14509#
14510# we still don't know if underflow occurred. result is ~ equal to 1. but,
14511# we don't know if the result was an underflow that rounded up to a 1
14512# or a normalized number that rounded down to a 1. so, redo the entire
14513# operation using RZ as the rounding mode to see what the pre-rounded
14514# result is. this case should be relatively rare.
14515#
14516	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
14517
14518	clr.l		%d1			# clear scratch register
14519	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
14520
14521	fmov.l		%d1,%fpcr		# set FPCR
14522	fmov.l		&0x0,%fpsr		# clear FPSR
14523
14524	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
14525
14526	fmov.l		&0x0,%fpcr		# clear FPCR
14527	fabs.x		%fp1			# make absolute value
14528	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
14529	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
14530	bra.w		fsgldiv_unfl		# yes; underflow occurred
14531
14532############################################################################
14533
14534#
14535# Divide: inputs are not both normalized; what are they?
14536#
14537fsgldiv_not_norm:
14538	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
14539	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
14540
14541	swbeg		&48
14542tbl_fsgldiv_op:
14543	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
14544	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
14545	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
14546	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
14547	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
14548	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
14549	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14550	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14551
14552	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
14553	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
14554	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
14555	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
14556	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
14557	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
14558	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14559	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14560
14561	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
14562	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
14563	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
14564	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
14565	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
14566	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
14567	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14568	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14569
14570	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
14571	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
14572	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
14573	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
14574	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
14575	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
14576	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14577	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14578
14579	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
14580	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
14581	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
14582	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
14583	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
14584	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
14585	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14586	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14587
14588	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
14589	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
14590	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
14591	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
14592	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
14593	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
14594	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14595	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14596
14597fsgldiv_res_qnan:
14598	bra.l		res_qnan
14599fsgldiv_res_snan:
14600	bra.l		res_snan
14601fsgldiv_res_operr:
14602	bra.l		res_operr
14603fsgldiv_inf_load:
14604	bra.l		fdiv_inf_load
14605fsgldiv_zero_load:
14606	bra.l		fdiv_zero_load
14607fsgldiv_inf_dst:
14608	bra.l		fdiv_inf_dst
14609
14610#########################################################################
14611# XDEF ****************************************************************	#
14612#	fadd(): emulates the fadd instruction				#
14613#	fsadd(): emulates the fadd instruction				#
14614#	fdadd(): emulates the fdadd instruction				#
14615#									#
14616# XREF ****************************************************************	#
14617# 	addsub_scaler2() - scale the operands so they won't take exc	#
14618#	ovf_res() - return default overflow result			#
14619#	unf_res() - return default underflow result			#
14620#	res_qnan() - set QNAN result					#
14621# 	res_snan() - set SNAN result					#
14622#	res_operr() - set OPERR result					#
14623#	scale_to_zero_src() - set src operand exponent equal to zero	#
14624#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
14625#									#
14626# INPUT ***************************************************************	#
14627#	a0 = pointer to extended precision source operand		#
14628# 	a1 = pointer to extended precision destination operand		#
14629#									#
14630# OUTPUT **************************************************************	#
14631#	fp0 = result							#
14632#	fp1 = EXOP (if exception occurred)				#
14633#									#
14634# ALGORITHM ***********************************************************	#
14635# 	Handle NANs, infinities, and zeroes as special cases. Divide	#
14636# norms into extended, single, and double precision.			#
14637#	Do addition after scaling exponents such that exception won't	#
14638# occur. Then, check result exponent to see if exception would have	#
14639# occurred. If so, return default result and maybe EXOP. Else, insert	#
14640# the correct result exponent and return. Set FPSR bits as appropriate.	#
14641#									#
14642#########################################################################
14643
14644	global		fsadd
14645fsadd:
14646	andi.b		&0x30,%d0		# clear rnd prec
14647	ori.b		&s_mode*0x10,%d0	# insert sgl prec
14648	bra.b		fadd
14649
14650	global		fdadd
14651fdadd:
14652	andi.b		&0x30,%d0		# clear rnd prec
14653	ori.b		&d_mode*0x10,%d0	# insert dbl prec
14654
14655	global		fadd
14656fadd:
14657	mov.l		%d0,L_SCR3(%a6)		# store rnd info
14658
14659	clr.w		%d1
14660	mov.b		DTAG(%a6),%d1
14661	lsl.b		&0x3,%d1
14662	or.b		STAG(%a6),%d1		# combine src tags
14663
14664	bne.w		fadd_not_norm		# optimize on non-norm input
14665
14666#
14667# ADD: norms and denorms
14668#
14669fadd_norm:
14670	bsr.l		addsub_scaler2		# scale exponents
14671
14672fadd_zero_entry:
14673	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14674
14675	fmov.l		&0x0,%fpsr		# clear FPSR
14676	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14677
14678	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14679
14680	fmov.l		&0x0,%fpcr		# clear FPCR
14681	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
14682
14683	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
14684
14685	fbeq.w		fadd_zero_exit		# if result is zero, end now
14686
14687	mov.l		%d2,-(%sp)		# save d2
14688
14689	fmovm.x		&0x01,-(%sp)		# save result to stack
14690
14691	mov.w		2+L_SCR3(%a6),%d1
14692	lsr.b		&0x6,%d1
14693
14694	mov.w		(%sp),%d2		# fetch new sign, exp
14695	andi.l		&0x7fff,%d2		# strip sign
14696	sub.l		%d0,%d2			# add scale factor
14697
14698	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
14699	bge.b		fadd_ovfl		# yes
14700
14701	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
14702	blt.w		fadd_unfl		# yes
14703	beq.w		fadd_may_unfl		# maybe; go find out
14704
14705fadd_normal:
14706	mov.w		(%sp),%d1
14707	andi.w		&0x8000,%d1		# keep sign
14708	or.w		%d2,%d1			# concat sign,new exp
14709	mov.w		%d1,(%sp)		# insert new exponent
14710
14711	fmovm.x		(%sp)+,&0x80		# return result in fp0
14712
14713	mov.l		(%sp)+,%d2		# restore d2
14714	rts
14715
14716fadd_zero_exit:
14717#	fmov.s		&0x00000000,%fp0	# return zero in fp0
14718	rts
14719
14720tbl_fadd_ovfl:
14721	long		0x7fff			# ext ovfl
14722	long		0x407f			# sgl ovfl
14723	long		0x43ff			# dbl ovfl
14724
14725tbl_fadd_unfl:
14726	long	        0x0000			# ext unfl
14727	long		0x3f81			# sgl unfl
14728	long		0x3c01			# dbl unfl
14729
14730fadd_ovfl:
14731	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
14732
14733	mov.b		FPCR_ENABLE(%a6),%d1
14734	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14735	bne.b		fadd_ovfl_ena		# yes
14736
14737	add.l		&0xc,%sp
14738fadd_ovfl_dis:
14739	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
14740	sne		%d1			# set sign param accordingly
14741	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14742	bsr.l		ovf_res			# calculate default result
14743	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
14744	fmovm.x		(%a0),&0x80		# return default result in fp0
14745	mov.l		(%sp)+,%d2		# restore d2
14746	rts
14747
14748fadd_ovfl_ena:
14749	mov.b		L_SCR3(%a6),%d1
14750	andi.b		&0xc0,%d1		# is precision extended?
14751	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
14752
14753fadd_ovfl_ena_cont:
14754	mov.w		(%sp),%d1
14755	andi.w		&0x8000,%d1		# keep sign
14756	subi.l		&0x6000,%d2		# add extra bias
14757	andi.w		&0x7fff,%d2
14758	or.w		%d2,%d1			# concat sign,new exp
14759	mov.w		%d1,(%sp)		# insert new exponent
14760
14761	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
14762	bra.b		fadd_ovfl_dis
14763
14764fadd_ovfl_ena_sd:
14765	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14766
14767	mov.l		L_SCR3(%a6),%d1
14768	andi.b		&0x30,%d1		# keep rnd mode
14769	fmov.l		%d1,%fpcr		# set FPCR
14770
14771	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14772
14773	fmov.l		&0x0,%fpcr		# clear FPCR
14774
14775	add.l		&0xc,%sp
14776	fmovm.x		&0x01,-(%sp)
14777	bra.b		fadd_ovfl_ena_cont
14778
14779fadd_unfl:
14780	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14781
14782	add.l		&0xc,%sp
14783
14784	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14785
14786	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14787	fmov.l		&0x0,%fpsr		# clear FPSR
14788
14789	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14790
14791	fmov.l		&0x0,%fpcr		# clear FPCR
14792	fmov.l		%fpsr,%d1		# save status
14793
14794	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
14795
14796	mov.b		FPCR_ENABLE(%a6),%d1
14797	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14798	bne.b		fadd_unfl_ena		# yes
14799
14800fadd_unfl_dis:
14801	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14802
14803	lea		FP_SCR0(%a6),%a0	# pass: result addr
14804	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14805	bsr.l		unf_res			# calculate default result
14806	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14807	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14808	mov.l		(%sp)+,%d2		# restore d2
14809	rts
14810
14811fadd_unfl_ena:
14812	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14813
14814	mov.l		L_SCR3(%a6),%d1
14815	andi.b		&0xc0,%d1		# is precision extended?
14816	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
14817
14818	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14819
14820fadd_unfl_ena_cont:
14821	fmov.l		&0x0,%fpsr		# clear FPSR
14822
14823	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
14824
14825	fmov.l		&0x0,%fpcr		# clear FPCR
14826
14827	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14828	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14829	mov.l		%d1,%d2			# make a copy
14830	andi.l		&0x7fff,%d1		# strip sign
14831	andi.w		&0x8000,%d2		# keep old sign
14832	sub.l		%d0,%d1			# add scale factor
14833	addi.l		&0x6000,%d1		# add new bias
14834	andi.w		&0x7fff,%d1		# clear top bit
14835	or.w		%d2,%d1			# concat sign,new exp
14836	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14837	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14838	bra.w		fadd_unfl_dis
14839
14840fadd_unfl_ena_sd:
14841	mov.l		L_SCR3(%a6),%d1
14842	andi.b		&0x30,%d1		# use only rnd mode
14843	fmov.l		%d1,%fpcr		# set FPCR
14844
14845	bra.b		fadd_unfl_ena_cont
14846
14847#
14848# result is equal to the smallest normalized number in the selected precision
14849# if the precision is extended, this result could not have come from an
14850# underflow that rounded up.
14851#
14852fadd_may_unfl:
14853	mov.l		L_SCR3(%a6),%d1
14854	andi.b		&0xc0,%d1
14855	beq.w		fadd_normal		# yes; no underflow occurred
14856
14857	mov.l		0x4(%sp),%d1		# extract hi(man)
14858	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
14859	bne.w		fadd_normal		# no; no underflow occurred
14860
14861	tst.l		0x8(%sp)		# is lo(man) = 0x0?
14862	bne.w		fadd_normal		# no; no underflow occurred
14863
14864	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
14865	beq.w		fadd_normal		# no; no underflow occurred
14866
14867#
14868# ok, so now the result has a exponent equal to the smallest normalized
14869# exponent for the selected precision. also, the mantissa is equal to
14870# 0x8000000000000000 and this mantissa is the result of rounding non-zero
14871# g,r,s.
14872# now, we must determine whether the pre-rounded result was an underflow
14873# rounded "up" or a normalized number rounded "down".
14874# so, we do this be re-executing the add using RZ as the rounding mode and
14875# seeing if the new result is smaller or equal to the current result.
14876#
14877	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
14878
14879	mov.l		L_SCR3(%a6),%d1
14880	andi.b		&0xc0,%d1		# keep rnd prec
14881	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
14882	fmov.l		%d1,%fpcr		# set FPCR
14883	fmov.l		&0x0,%fpsr		# clear FPSR
14884
14885	fadd.x		FP_SCR0(%a6),%fp1	# execute add
14886
14887	fmov.l		&0x0,%fpcr		# clear FPCR
14888
14889	fabs.x		%fp0			# compare absolute values
14890	fabs.x		%fp1
14891	fcmp.x		%fp0,%fp1		# is first result > second?
14892
14893	fbgt.w		fadd_unfl		# yes; it's an underflow
14894	bra.w		fadd_normal		# no; it's not an underflow
14895
14896##########################################################################
14897
14898#
14899# Add: inputs are not both normalized; what are they?
14900#
14901fadd_not_norm:
14902	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
14903	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
14904
14905	swbeg		&48
14906tbl_fadd_op:
14907	short		fadd_norm	- tbl_fadd_op # NORM + NORM
14908	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
14909	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
14910	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14911	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
14912	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14913	short		tbl_fadd_op	- tbl_fadd_op #
14914	short		tbl_fadd_op	- tbl_fadd_op #
14915
14916	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
14917	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
14918	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
14919	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14920	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
14921	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14922	short		tbl_fadd_op	- tbl_fadd_op #
14923	short		tbl_fadd_op	- tbl_fadd_op #
14924
14925	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
14926	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
14927	short		fadd_inf_2	- tbl_fadd_op # INF + INF
14928	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14929	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
14930	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14931	short		tbl_fadd_op	- tbl_fadd_op #
14932	short		tbl_fadd_op	- tbl_fadd_op #
14933
14934	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
14935	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
14936	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
14937	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
14938	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
14939	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
14940	short		tbl_fadd_op	- tbl_fadd_op #
14941	short		tbl_fadd_op	- tbl_fadd_op #
14942
14943	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
14944	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
14945	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
14946	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14947	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
14948	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14949	short		tbl_fadd_op	- tbl_fadd_op #
14950	short		tbl_fadd_op	- tbl_fadd_op #
14951
14952	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
14953	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
14954	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
14955	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
14956	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
14957	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
14958	short		tbl_fadd_op	- tbl_fadd_op #
14959	short		tbl_fadd_op	- tbl_fadd_op #
14960
14961fadd_res_qnan:
14962	bra.l		res_qnan
14963fadd_res_snan:
14964	bra.l		res_snan
14965
14966#
14967# both operands are ZEROes
14968#
14969fadd_zero_2:
14970	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
14971	mov.b		DST_EX(%a1),%d1
14972	eor.b		%d0,%d1
14973	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
14974
14975# the signs are the same. so determine whether they are positive or negative
14976# and return the appropriately signed zero.
14977	tst.b		%d0			# are ZEROes positive or negative?
14978	bmi.b		fadd_zero_rm		# negative
14979	fmov.s		&0x00000000,%fp0	# return +ZERO
14980	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
14981	rts
14982
14983#
14984# the ZEROes have opposite signs:
14985# - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
14986# - -ZERO is returned in the case of RM.
14987#
14988fadd_zero_2_chk_rm:
14989	mov.b		3+L_SCR3(%a6),%d1
14990	andi.b		&0x30,%d1		# extract rnd mode
14991	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
14992	beq.b		fadd_zero_rm		# yes
14993	fmov.s		&0x00000000,%fp0	# return +ZERO
14994	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
14995	rts
14996
14997fadd_zero_rm:
14998	fmov.s		&0x80000000,%fp0	# return -ZERO
14999	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
15000	rts
15001
15002#
15003# one operand is a ZERO and the other is a DENORM or NORM. scale
15004# the DENORM or NORM and jump to the regular fadd routine.
15005#
15006fadd_zero_dst:
15007	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15008	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15009	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15010	bsr.l		scale_to_zero_src	# scale the operand
15011	clr.w		FP_SCR1_EX(%a6)
15012	clr.l		FP_SCR1_HI(%a6)
15013	clr.l		FP_SCR1_LO(%a6)
15014	bra.w		fadd_zero_entry		# go execute fadd
15015
15016fadd_zero_src:
15017	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
15018	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15019	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15020	bsr.l		scale_to_zero_dst	# scale the operand
15021	clr.w		FP_SCR0_EX(%a6)
15022	clr.l		FP_SCR0_HI(%a6)
15023	clr.l		FP_SCR0_LO(%a6)
15024	bra.w		fadd_zero_entry		# go execute fadd
15025
15026#
15027# both operands are INFs. an OPERR will result if the INFs have
15028# different signs. else, an INF of the same sign is returned
15029#
15030fadd_inf_2:
15031	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
15032	mov.b		DST_EX(%a1),%d1
15033	eor.b		%d1,%d0
15034	bmi.l		res_operr		# weed out (-INF)+(+INF)
15035
15036# ok, so it's not an OPERR. but, we do have to remember to return the
15037# src INF since that's where the 881/882 gets the j-bit from...
15038
15039#
15040# operands are INF and one of {ZERO, INF, DENORM, NORM}
15041#
15042fadd_inf_src:
15043	fmovm.x		SRC(%a0),&0x80		# return src INF
15044	tst.b		SRC_EX(%a0)		# is INF positive?
15045	bpl.b		fadd_inf_done		# yes; we're done
15046	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15047	rts
15048
15049#
15050# operands are INF and one of {ZERO, INF, DENORM, NORM}
15051#
15052fadd_inf_dst:
15053	fmovm.x		DST(%a1),&0x80		# return dst INF
15054	tst.b		DST_EX(%a1)		# is INF positive?
15055	bpl.b		fadd_inf_done		# yes; we're done
15056	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15057	rts
15058
15059fadd_inf_done:
15060	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
15061	rts
15062
15063#########################################################################
15064# XDEF ****************************************************************	#
15065#	fsub(): emulates the fsub instruction				#
15066#	fssub(): emulates the fssub instruction				#
15067#	fdsub(): emulates the fdsub instruction				#
15068#									#
15069# XREF ****************************************************************	#
15070# 	addsub_scaler2() - scale the operands so they won't take exc	#
15071#	ovf_res() - return default overflow result			#
15072#	unf_res() - return default underflow result			#
15073#	res_qnan() - set QNAN result					#
15074# 	res_snan() - set SNAN result					#
15075#	res_operr() - set OPERR result					#
15076#	scale_to_zero_src() - set src operand exponent equal to zero	#
15077#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
15078#									#
15079# INPUT ***************************************************************	#
15080#	a0 = pointer to extended precision source operand		#
15081# 	a1 = pointer to extended precision destination operand		#
15082#									#
15083# OUTPUT **************************************************************	#
15084#	fp0 = result							#
15085#	fp1 = EXOP (if exception occurred)				#
15086#									#
15087# ALGORITHM ***********************************************************	#
15088# 	Handle NANs, infinities, and zeroes as special cases. Divide	#
15089# norms into extended, single, and double precision.			#
15090#	Do subtraction after scaling exponents such that exception won't#
15091# occur. Then, check result exponent to see if exception would have	#
15092# occurred. If so, return default result and maybe EXOP. Else, insert	#
15093# the correct result exponent and return. Set FPSR bits as appropriate.	#
15094#									#
15095#########################################################################
15096
15097	global		fssub
15098fssub:
15099	andi.b		&0x30,%d0		# clear rnd prec
15100	ori.b		&s_mode*0x10,%d0	# insert sgl prec
15101	bra.b		fsub
15102
15103	global		fdsub
15104fdsub:
15105	andi.b		&0x30,%d0		# clear rnd prec
15106	ori.b		&d_mode*0x10,%d0	# insert dbl prec
15107
15108	global		fsub
15109fsub:
15110	mov.l		%d0,L_SCR3(%a6)		# store rnd info
15111
15112	clr.w		%d1
15113	mov.b		DTAG(%a6),%d1
15114	lsl.b		&0x3,%d1
15115	or.b		STAG(%a6),%d1		# combine src tags
15116
15117	bne.w		fsub_not_norm		# optimize on non-norm input
15118
15119#
15120# SUB: norms and denorms
15121#
15122fsub_norm:
15123	bsr.l		addsub_scaler2		# scale exponents
15124
15125fsub_zero_entry:
15126	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15127
15128	fmov.l		&0x0,%fpsr		# clear FPSR
15129	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15130
15131	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15132
15133	fmov.l		&0x0,%fpcr		# clear FPCR
15134	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
15135
15136	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
15137
15138	fbeq.w		fsub_zero_exit		# if result zero, end now
15139
15140	mov.l		%d2,-(%sp)		# save d2
15141
15142	fmovm.x		&0x01,-(%sp)		# save result to stack
15143
15144	mov.w		2+L_SCR3(%a6),%d1
15145	lsr.b		&0x6,%d1
15146
15147	mov.w		(%sp),%d2		# fetch new exponent
15148	andi.l		&0x7fff,%d2		# strip sign
15149	sub.l		%d0,%d2			# add scale factor
15150
15151	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
15152	bge.b		fsub_ovfl		# yes
15153
15154	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
15155	blt.w		fsub_unfl		# yes
15156	beq.w		fsub_may_unfl		# maybe; go find out
15157
15158fsub_normal:
15159	mov.w		(%sp),%d1
15160	andi.w		&0x8000,%d1		# keep sign
15161	or.w		%d2,%d1			# insert new exponent
15162	mov.w		%d1,(%sp)		# insert new exponent
15163
15164	fmovm.x		(%sp)+,&0x80		# return result in fp0
15165
15166	mov.l		(%sp)+,%d2		# restore d2
15167	rts
15168
15169fsub_zero_exit:
15170#	fmov.s		&0x00000000,%fp0	# return zero in fp0
15171	rts
15172
15173tbl_fsub_ovfl:
15174	long		0x7fff			# ext ovfl
15175	long		0x407f			# sgl ovfl
15176	long		0x43ff			# dbl ovfl
15177
15178tbl_fsub_unfl:
15179	long	        0x0000			# ext unfl
15180	long		0x3f81			# sgl unfl
15181	long		0x3c01			# dbl unfl
15182
15183fsub_ovfl:
15184	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15185
15186	mov.b		FPCR_ENABLE(%a6),%d1
15187	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
15188	bne.b		fsub_ovfl_ena		# yes
15189
15190	add.l		&0xc,%sp
15191fsub_ovfl_dis:
15192	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
15193	sne		%d1			# set sign param accordingly
15194	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
15195	bsr.l		ovf_res			# calculate default result
15196	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
15197	fmovm.x		(%a0),&0x80		# return default result in fp0
15198	mov.l		(%sp)+,%d2		# restore d2
15199	rts
15200
15201fsub_ovfl_ena:
15202	mov.b		L_SCR3(%a6),%d1
15203	andi.b		&0xc0,%d1		# is precision extended?
15204	bne.b		fsub_ovfl_ena_sd	# no
15205
15206fsub_ovfl_ena_cont:
15207	mov.w		(%sp),%d1		# fetch {sgn,exp}
15208	andi.w		&0x8000,%d1		# keep sign
15209	subi.l		&0x6000,%d2		# subtract new bias
15210	andi.w		&0x7fff,%d2		# clear top bit
15211	or.w		%d2,%d1			# concat sign,exp
15212	mov.w		%d1,(%sp)		# insert new exponent
15213
15214	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
15215	bra.b		fsub_ovfl_dis
15216
15217fsub_ovfl_ena_sd:
15218	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15219
15220	mov.l		L_SCR3(%a6),%d1
15221	andi.b		&0x30,%d1		# clear rnd prec
15222	fmov.l		%d1,%fpcr		# set FPCR
15223
15224	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15225
15226	fmov.l		&0x0,%fpcr		# clear FPCR
15227
15228	add.l		&0xc,%sp
15229	fmovm.x		&0x01,-(%sp)
15230	bra.b		fsub_ovfl_ena_cont
15231
15232fsub_unfl:
15233	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15234
15235	add.l		&0xc,%sp
15236
15237	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15238
15239	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
15240	fmov.l		&0x0,%fpsr		# clear FPSR
15241
15242	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15243
15244	fmov.l		&0x0,%fpcr		# clear FPCR
15245	fmov.l		%fpsr,%d1		# save status
15246
15247	or.l		%d1,USER_FPSR(%a6)
15248
15249	mov.b		FPCR_ENABLE(%a6),%d1
15250	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
15251	bne.b		fsub_unfl_ena		# yes
15252
15253fsub_unfl_dis:
15254	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15255
15256	lea		FP_SCR0(%a6),%a0	# pass: result addr
15257	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
15258	bsr.l		unf_res			# calculate default result
15259	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
15260	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
15261	mov.l		(%sp)+,%d2		# restore d2
15262	rts
15263
15264fsub_unfl_ena:
15265	fmovm.x		FP_SCR1(%a6),&0x40
15266
15267	mov.l		L_SCR3(%a6),%d1
15268	andi.b		&0xc0,%d1		# is precision extended?
15269	bne.b		fsub_unfl_ena_sd	# no
15270
15271	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15272
15273fsub_unfl_ena_cont:
15274	fmov.l		&0x0,%fpsr		# clear FPSR
15275
15276	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
15277
15278	fmov.l		&0x0,%fpcr		# clear FPCR
15279
15280	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
15281	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
15282	mov.l		%d1,%d2			# make a copy
15283	andi.l		&0x7fff,%d1		# strip sign
15284	andi.w		&0x8000,%d2		# keep old sign
15285	sub.l		%d0,%d1			# add scale factor
15286	addi.l		&0x6000,%d1		# subtract new bias
15287	andi.w		&0x7fff,%d1		# clear top bit
15288	or.w		%d2,%d1			# concat sgn,exp
15289	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
15290	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
15291	bra.w		fsub_unfl_dis
15292
15293fsub_unfl_ena_sd:
15294	mov.l		L_SCR3(%a6),%d1
15295	andi.b		&0x30,%d1		# clear rnd prec
15296	fmov.l		%d1,%fpcr		# set FPCR
15297
15298	bra.b		fsub_unfl_ena_cont
15299
15300#
15301# result is equal to the smallest normalized number in the selected precision
15302# if the precision is extended, this result could not have come from an
15303# underflow that rounded up.
15304#
15305fsub_may_unfl:
15306	mov.l		L_SCR3(%a6),%d1
15307	andi.b		&0xc0,%d1		# fetch rnd prec
15308	beq.w		fsub_normal		# yes; no underflow occurred
15309
15310	mov.l		0x4(%sp),%d1
15311	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
15312	bne.w		fsub_normal		# no; no underflow occurred
15313
15314	tst.l		0x8(%sp)		# is lo(man) = 0x0?
15315	bne.w		fsub_normal		# no; no underflow occurred
15316
15317	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
15318	beq.w		fsub_normal		# no; no underflow occurred
15319
15320#
15321# ok, so now the result has a exponent equal to the smallest normalized
15322# exponent for the selected precision. also, the mantissa is equal to
15323# 0x8000000000000000 and this mantissa is the result of rounding non-zero
15324# g,r,s.
15325# now, we must determine whether the pre-rounded result was an underflow
15326# rounded "up" or a normalized number rounded "down".
15327# so, we do this be re-executing the add using RZ as the rounding mode and
15328# seeing if the new result is smaller or equal to the current result.
15329#
15330	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
15331
15332	mov.l		L_SCR3(%a6),%d1
15333	andi.b		&0xc0,%d1		# keep rnd prec
15334	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
15335	fmov.l		%d1,%fpcr		# set FPCR
15336	fmov.l		&0x0,%fpsr		# clear FPSR
15337
15338	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
15339
15340	fmov.l		&0x0,%fpcr		# clear FPCR
15341
15342	fabs.x		%fp0			# compare absolute values
15343	fabs.x		%fp1
15344	fcmp.x		%fp0,%fp1		# is first result > second?
15345
15346	fbgt.w		fsub_unfl		# yes; it's an underflow
15347	bra.w		fsub_normal		# no; it's not an underflow
15348
15349##########################################################################
15350
15351#
15352# Sub: inputs are not both normalized; what are they?
15353#
15354fsub_not_norm:
15355	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
15356	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
15357
15358	swbeg		&48
15359tbl_fsub_op:
15360	short		fsub_norm	- tbl_fsub_op # NORM - NORM
15361	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
15362	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
15363	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15364	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
15365	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15366	short		tbl_fsub_op	- tbl_fsub_op #
15367	short		tbl_fsub_op	- tbl_fsub_op #
15368
15369	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
15370	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
15371	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
15372	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15373	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
15374	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15375	short		tbl_fsub_op	- tbl_fsub_op #
15376	short		tbl_fsub_op	- tbl_fsub_op #
15377
15378	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
15379	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
15380	short		fsub_inf_2	- tbl_fsub_op # INF - INF
15381	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15382	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
15383	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15384	short		tbl_fsub_op	- tbl_fsub_op #
15385	short		tbl_fsub_op	- tbl_fsub_op #
15386
15387	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
15388	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
15389	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
15390	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
15391	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
15392	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
15393	short		tbl_fsub_op	- tbl_fsub_op #
15394	short		tbl_fsub_op	- tbl_fsub_op #
15395
15396	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
15397	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
15398	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
15399	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15400	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
15401	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15402	short		tbl_fsub_op	- tbl_fsub_op #
15403	short		tbl_fsub_op	- tbl_fsub_op #
15404
15405	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
15406	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
15407	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
15408	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
15409	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
15410	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
15411	short		tbl_fsub_op	- tbl_fsub_op #
15412	short		tbl_fsub_op	- tbl_fsub_op #
15413
15414fsub_res_qnan:
15415	bra.l		res_qnan
15416fsub_res_snan:
15417	bra.l		res_snan
15418
15419#
15420# both operands are ZEROes
15421#
15422fsub_zero_2:
15423	mov.b		SRC_EX(%a0),%d0
15424	mov.b		DST_EX(%a1),%d1
15425	eor.b		%d1,%d0
15426	bpl.b		fsub_zero_2_chk_rm
15427
15428# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
15429	tst.b		%d0			# is dst negative?
15430	bmi.b		fsub_zero_2_rm		# yes
15431	fmov.s		&0x00000000,%fp0	# no; return +ZERO
15432	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
15433	rts
15434
15435#
15436# the ZEROes have the same signs:
15437# - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
15438# - -ZERO is returned in the case of RM.
15439#
15440fsub_zero_2_chk_rm:
15441	mov.b		3+L_SCR3(%a6),%d1
15442	andi.b		&0x30,%d1		# extract rnd mode
15443	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
15444	beq.b		fsub_zero_2_rm		# yes
15445	fmov.s		&0x00000000,%fp0	# no; return +ZERO
15446	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
15447	rts
15448
15449fsub_zero_2_rm:
15450	fmov.s		&0x80000000,%fp0	# return -ZERO
15451	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
15452	rts
15453
15454#
15455# one operand is a ZERO and the other is a DENORM or a NORM.
15456# scale the DENORM or NORM and jump to the regular fsub routine.
15457#
15458fsub_zero_dst:
15459	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15460	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15461	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15462	bsr.l		scale_to_zero_src	# scale the operand
15463	clr.w		FP_SCR1_EX(%a6)
15464	clr.l		FP_SCR1_HI(%a6)
15465	clr.l		FP_SCR1_LO(%a6)
15466	bra.w		fsub_zero_entry		# go execute fsub
15467
15468fsub_zero_src:
15469	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
15470	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15471	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15472	bsr.l		scale_to_zero_dst	# scale the operand
15473	clr.w		FP_SCR0_EX(%a6)
15474	clr.l		FP_SCR0_HI(%a6)
15475	clr.l		FP_SCR0_LO(%a6)
15476	bra.w		fsub_zero_entry		# go execute fsub
15477
15478#
15479# both operands are INFs. an OPERR will result if the INFs have the
15480# same signs. else,
15481#
15482fsub_inf_2:
15483	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
15484	mov.b		DST_EX(%a1),%d1
15485	eor.b		%d1,%d0
15486	bpl.l		res_operr		# weed out (-INF)+(+INF)
15487
15488# ok, so it's not an OPERR. but we do have to remember to return
15489# the src INF since that's where the 881/882 gets the j-bit.
15490
15491fsub_inf_src:
15492	fmovm.x		SRC(%a0),&0x80		# return src INF
15493	fneg.x		%fp0			# invert sign
15494	fbge.w		fsub_inf_done		# sign is now positive
15495	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15496	rts
15497
15498fsub_inf_dst:
15499	fmovm.x		DST(%a1),&0x80		# return dst INF
15500	tst.b		DST_EX(%a1)		# is INF negative?
15501	bpl.b		fsub_inf_done		# no
15502	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15503	rts
15504
15505fsub_inf_done:
15506	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
15507	rts
15508
15509#########################################################################
15510# XDEF ****************************************************************	#
15511# 	fsqrt(): emulates the fsqrt instruction				#
15512#	fssqrt(): emulates the fssqrt instruction			#
15513#	fdsqrt(): emulates the fdsqrt instruction			#
15514#									#
15515# XREF ****************************************************************	#
15516#	scale_sqrt() - scale the source operand				#
15517#	unf_res() - return default underflow result			#
15518#	ovf_res() - return default overflow result			#
15519# 	res_qnan_1op() - return QNAN result				#
15520# 	res_snan_1op() - return SNAN result				#
15521#									#
15522# INPUT ***************************************************************	#
15523#	a0 = pointer to extended precision source operand		#
15524#	d0  rnd prec,mode						#
15525#									#
15526# OUTPUT **************************************************************	#
15527#	fp0 = result							#
15528#	fp1 = EXOP (if exception occurred)				#
15529#									#
15530# ALGORITHM ***********************************************************	#
15531#	Handle NANs, infinities, and zeroes as special cases. Divide	#
15532# norms/denorms into ext/sgl/dbl precision.				#
15533#	For norms/denorms, scale the exponents such that a sqrt		#
15534# instruction won't cause an exception. Use the regular fsqrt to	#
15535# compute a result. Check if the regular operands would have taken	#
15536# an exception. If so, return the default overflow/underflow result	#
15537# and return the EXOP if exceptions are enabled. Else, scale the 	#
15538# result operand to the proper exponent.				#
15539#									#
15540#########################################################################
15541
15542	global		fssqrt
15543fssqrt:
15544	andi.b		&0x30,%d0		# clear rnd prec
15545	ori.b		&s_mode*0x10,%d0	# insert sgl precision
15546	bra.b		fsqrt
15547
15548	global		fdsqrt
15549fdsqrt:
15550	andi.b		&0x30,%d0		# clear rnd prec
15551	ori.b		&d_mode*0x10,%d0	# insert dbl precision
15552
15553	global		fsqrt
15554fsqrt:
15555	mov.l		%d0,L_SCR3(%a6)		# store rnd info
15556	clr.w		%d1
15557	mov.b		STAG(%a6),%d1
15558	bne.w		fsqrt_not_norm		# optimize on non-norm input
15559
15560#
15561# SQUARE ROOT: norms and denorms ONLY!
15562#
15563fsqrt_norm:
15564	tst.b		SRC_EX(%a0)		# is operand negative?
15565	bmi.l		res_operr		# yes
15566
15567	andi.b		&0xc0,%d0		# is precision extended?
15568	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
15569
15570	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15571	fmov.l		&0x0,%fpsr		# clear FPSR
15572
15573	fsqrt.x		(%a0),%fp0		# execute square root
15574
15575	fmov.l		%fpsr,%d1
15576	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
15577
15578	rts
15579
15580fsqrt_denorm:
15581	tst.b		SRC_EX(%a0)		# is operand negative?
15582	bmi.l		res_operr		# yes
15583
15584	andi.b		&0xc0,%d0		# is precision extended?
15585	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
15586
15587	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15588	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15589	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15590
15591	bsr.l		scale_sqrt		# calculate scale factor
15592
15593	bra.w		fsqrt_sd_normal
15594
15595#
15596# operand is either single or double
15597#
15598fsqrt_not_ext:
15599	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
15600	bne.w		fsqrt_dbl
15601
15602#
15603# operand is to be rounded to single precision
15604#
15605fsqrt_sgl:
15606	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15607	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15608	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15609
15610	bsr.l		scale_sqrt		# calculate scale factor
15611
15612	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
15613	beq.w		fsqrt_sd_may_unfl
15614	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
15615	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
15616	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
15617	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
15618
15619#
15620# operand will NOT overflow or underflow when moved in to the fp reg file
15621#
15622fsqrt_sd_normal:
15623	fmov.l		&0x0,%fpsr		# clear FPSR
15624	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15625
15626	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
15627
15628	fmov.l		%fpsr,%d1		# save FPSR
15629	fmov.l		&0x0,%fpcr		# clear FPCR
15630
15631	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15632
15633fsqrt_sd_normal_exit:
15634	mov.l		%d2,-(%sp)		# save d2
15635	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15636	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
15637	mov.l		%d1,%d2			# make a copy
15638	andi.l		&0x7fff,%d1		# strip sign
15639	sub.l		%d0,%d1			# add scale factor
15640	andi.w		&0x8000,%d2		# keep old sign
15641	or.w		%d1,%d2			# concat old sign,new exp
15642	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
15643	mov.l		(%sp)+,%d2		# restore d2
15644	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
15645	rts
15646
15647#
15648# operand is to be rounded to double precision
15649#
15650fsqrt_dbl:
15651	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15652	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15653	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15654
15655	bsr.l		scale_sqrt		# calculate scale factor
15656
15657	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
15658	beq.w		fsqrt_sd_may_unfl
15659	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
15660	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
15661	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
15662	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
15663	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
15664
15665# we're on the line here and the distinguising characteristic is whether
15666# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
15667# elsewise fall through to underflow.
15668fsqrt_sd_may_unfl:
15669	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
15670	bne.w		fsqrt_sd_normal		# yes, so no underflow
15671
15672#
15673# operand WILL underflow when moved in to the fp register file
15674#
15675fsqrt_sd_unfl:
15676	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15677
15678	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
15679	fmov.l		&0x0,%fpsr		# clear FPSR
15680
15681	fsqrt.x 	FP_SCR0(%a6),%fp0	# execute square root
15682
15683	fmov.l		%fpsr,%d1		# save status
15684	fmov.l		&0x0,%fpcr		# clear FPCR
15685
15686	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15687
15688# if underflow or inexact is enabled, go calculate EXOP first.
15689	mov.b		FPCR_ENABLE(%a6),%d1
15690	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
15691	bne.b		fsqrt_sd_unfl_ena	# yes
15692
15693fsqrt_sd_unfl_dis:
15694	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15695
15696	lea		FP_SCR0(%a6),%a0	# pass: result addr
15697	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
15698	bsr.l		unf_res			# calculate default result
15699	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
15700	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
15701	rts
15702
15703#
15704# operand will underflow AND underflow is enabled.
15705# therefore, we must return the result rounded to extended precision.
15706#
15707fsqrt_sd_unfl_ena:
15708	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
15709	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
15710	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
15711
15712	mov.l		%d2,-(%sp)		# save d2
15713	mov.l		%d1,%d2			# make a copy
15714	andi.l		&0x7fff,%d1		# strip sign
15715	andi.w		&0x8000,%d2		# keep old sign
15716	sub.l		%d0,%d1			# subtract scale factor
15717	addi.l		&0x6000,%d1		# add new bias
15718	andi.w		&0x7fff,%d1
15719	or.w		%d2,%d1			# concat new sign,new exp
15720	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
15721	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
15722	mov.l		(%sp)+,%d2		# restore d2
15723	bra.b		fsqrt_sd_unfl_dis
15724
15725#
15726# operand WILL overflow.
15727#
15728fsqrt_sd_ovfl:
15729	fmov.l		&0x0,%fpsr		# clear FPSR
15730	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15731
15732	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
15733
15734	fmov.l		&0x0,%fpcr		# clear FPCR
15735	fmov.l		%fpsr,%d1		# save FPSR
15736
15737	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15738
15739fsqrt_sd_ovfl_tst:
15740	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15741
15742	mov.b		FPCR_ENABLE(%a6),%d1
15743	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
15744	bne.b		fsqrt_sd_ovfl_ena	# yes
15745
15746#
15747# OVFL is not enabled; therefore, we must create the default result by
15748# calling ovf_res().
15749#
15750fsqrt_sd_ovfl_dis:
15751	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
15752	sne		%d1			# set sign param accordingly
15753	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
15754	bsr.l		ovf_res			# calculate default result
15755	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
15756	fmovm.x		(%a0),&0x80		# return default result in fp0
15757	rts
15758
15759#
15760# OVFL is enabled.
15761# the INEX2 bit has already been updated by the round to the correct precision.
15762# now, round to extended(and don't alter the FPSR).
15763#
15764fsqrt_sd_ovfl_ena:
15765	mov.l		%d2,-(%sp)		# save d2
15766	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
15767	mov.l		%d1,%d2			# make a copy
15768	andi.l		&0x7fff,%d1		# strip sign
15769	andi.w		&0x8000,%d2		# keep old sign
15770	sub.l		%d0,%d1			# add scale factor
15771	subi.l		&0x6000,%d1		# subtract bias
15772	andi.w		&0x7fff,%d1
15773	or.w		%d2,%d1			# concat sign,exp
15774	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
15775	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
15776	mov.l		(%sp)+,%d2		# restore d2
15777	bra.b		fsqrt_sd_ovfl_dis
15778
15779#
15780# the move in MAY underflow. so...
15781#
15782fsqrt_sd_may_ovfl:
15783	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
15784	bne.w		fsqrt_sd_ovfl		# yes, so overflow
15785
15786	fmov.l		&0x0,%fpsr		# clear FPSR
15787	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15788
15789	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
15790
15791	fmov.l		%fpsr,%d1		# save status
15792	fmov.l		&0x0,%fpcr		# clear FPCR
15793
15794	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15795
15796	fmov.x		%fp0,%fp1		# make a copy of result
15797	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
15798	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
15799
15800# no, it didn't overflow; we have correct result
15801	bra.w		fsqrt_sd_normal_exit
15802
15803##########################################################################
15804
15805#
15806# input is not normalized; what is it?
15807#
15808fsqrt_not_norm:
15809	cmpi.b		%d1,&DENORM		# weed out DENORM
15810	beq.w		fsqrt_denorm
15811	cmpi.b		%d1,&ZERO		# weed out ZERO
15812	beq.b		fsqrt_zero
15813	cmpi.b		%d1,&INF		# weed out INF
15814	beq.b		fsqrt_inf
15815	cmpi.b		%d1,&SNAN		# weed out SNAN
15816	beq.l		res_snan_1op
15817	bra.l		res_qnan_1op
15818
15819#
15820# 	fsqrt(+0) = +0
15821# 	fsqrt(-0) = -0
15822#	fsqrt(+INF) = +INF
15823# 	fsqrt(-INF) = OPERR
15824#
15825fsqrt_zero:
15826	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
15827	bmi.b		fsqrt_zero_m		# negative
15828fsqrt_zero_p:
15829	fmov.s		&0x00000000,%fp0	# return +ZERO
15830	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
15831	rts
15832fsqrt_zero_m:
15833	fmov.s		&0x80000000,%fp0	# return -ZERO
15834	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
15835	rts
15836
15837fsqrt_inf:
15838	tst.b		SRC_EX(%a0)		# is INF positive or negative?
15839	bmi.l		res_operr		# negative
15840fsqrt_inf_p:
15841	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
15842	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
15843	rts
15844
15845##########################################################################
15846
15847#########################################################################
15848# XDEF ****************************************************************	#
15849#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
15850#			  OVFL/UNFL exceptions will result		#
15851#									#
15852# XREF ****************************************************************	#
15853#	norm() - normalize mantissa after adjusting exponent		#
15854#									#
15855# INPUT ***************************************************************	#
15856#	FP_SRC(a6) = fp op1(src)					#
15857#	FP_DST(a6) = fp op2(dst)					#
15858# 									#
15859# OUTPUT **************************************************************	#
15860#	FP_SRC(a6) = fp op1 scaled(src)					#
15861#	FP_DST(a6) = fp op2 scaled(dst)					#
15862#	d0         = scale amount					#
15863#									#
15864# ALGORITHM ***********************************************************	#
15865# 	If the DST exponent is > the SRC exponent, set the DST exponent	#
15866# equal to 0x3fff and scale the SRC exponent by the value that the	#
15867# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
15868# do the opposite. Return this scale factor in d0.			#
15869#	If the two exponents differ by > the number of mantissa bits	#
15870# plus two, then set the smallest exponent to a very small value as a	#
15871# quick shortcut.							#
15872#									#
15873#########################################################################
15874
15875	global		addsub_scaler2
15876addsub_scaler2:
15877	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15878	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15879	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15880	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15881	mov.w		SRC_EX(%a0),%d0
15882	mov.w		DST_EX(%a1),%d1
15883	mov.w		%d0,FP_SCR0_EX(%a6)
15884	mov.w		%d1,FP_SCR1_EX(%a6)
15885
15886	andi.w		&0x7fff,%d0
15887	andi.w		&0x7fff,%d1
15888	mov.w		%d0,L_SCR1(%a6)		# store src exponent
15889	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
15890
15891	cmp.w		%d0, %d1		# is src exp >= dst exp?
15892	bge.l		src_exp_ge2
15893
15894# dst exp is >  src exp; scale dst to exp = 0x3fff
15895dst_exp_gt2:
15896	bsr.l		scale_to_zero_dst
15897	mov.l		%d0,-(%sp)		# save scale factor
15898
15899	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
15900	bne.b		cmpexp12
15901
15902	lea		FP_SCR0(%a6),%a0
15903	bsr.l		norm			# normalize the denorm; result is new exp
15904	neg.w		%d0			# new exp = -(shft val)
15905	mov.w		%d0,L_SCR1(%a6)		# inset new exp
15906
15907cmpexp12:
15908	mov.w		2+L_SCR1(%a6),%d0
15909	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
15910
15911	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
15912	bge.b		quick_scale12
15913
15914	mov.w		L_SCR1(%a6),%d0
15915	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
15916	mov.w		FP_SCR0_EX(%a6),%d1
15917	and.w		&0x8000,%d1
15918	or.w		%d1,%d0			# concat {sgn,new exp}
15919	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
15920
15921	mov.l		(%sp)+,%d0		# return SCALE factor
15922	rts
15923
15924quick_scale12:
15925	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
15926	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
15927
15928	mov.l		(%sp)+,%d0		# return SCALE factor
15929	rts
15930
15931# src exp is >= dst exp; scale src to exp = 0x3fff
15932src_exp_ge2:
15933	bsr.l		scale_to_zero_src
15934	mov.l		%d0,-(%sp)		# save scale factor
15935
15936	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
15937	bne.b		cmpexp22
15938	lea		FP_SCR1(%a6),%a0
15939	bsr.l		norm			# normalize the denorm; result is new exp
15940	neg.w		%d0			# new exp = -(shft val)
15941	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
15942
15943cmpexp22:
15944	mov.w		L_SCR1(%a6),%d0
15945	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
15946
15947	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
15948	bge.b		quick_scale22
15949
15950	mov.w		2+L_SCR1(%a6),%d0
15951	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
15952	mov.w		FP_SCR1_EX(%a6),%d1
15953	andi.w		&0x8000,%d1
15954	or.w		%d1,%d0			# concat {sgn,new exp}
15955	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
15956
15957	mov.l		(%sp)+,%d0		# return SCALE factor
15958	rts
15959
15960quick_scale22:
15961	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
15962	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
15963
15964	mov.l		(%sp)+,%d0		# return SCALE factor
15965	rts
15966
15967##########################################################################
15968
15969#########################################################################
15970# XDEF ****************************************************************	#
15971#	scale_to_zero_src(): scale the exponent of extended precision	#
15972#			     value at FP_SCR0(a6).			#
15973#									#
15974# XREF ****************************************************************	#
15975#	norm() - normalize the mantissa if the operand was a DENORM	#
15976#									#
15977# INPUT ***************************************************************	#
15978#	FP_SCR0(a6) = extended precision operand to be scaled		#
15979# 									#
15980# OUTPUT **************************************************************	#
15981#	FP_SCR0(a6) = scaled extended precision operand			#
15982#	d0	    = scale value					#
15983#									#
15984# ALGORITHM ***********************************************************	#
15985# 	Set the exponent of the input operand to 0x3fff. Save the value	#
15986# of the difference between the original and new exponent. Then, 	#
15987# normalize the operand if it was a DENORM. Add this normalization	#
15988# value to the previous value. Return the result.			#
15989#									#
15990#########################################################################
15991
15992	global		scale_to_zero_src
15993scale_to_zero_src:
15994	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
15995	mov.w		%d1,%d0			# make a copy
15996
15997	andi.l		&0x7fff,%d1		# extract operand's exponent
15998
15999	andi.w		&0x8000,%d0		# extract operand's sgn
16000	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
16001
16002	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
16003
16004	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
16005	beq.b		stzs_denorm		# normalize the DENORM
16006
16007stzs_norm:
16008	mov.l		&0x3fff,%d0
16009	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16010
16011	rts
16012
16013stzs_denorm:
16014	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
16015	bsr.l		norm			# normalize denorm
16016	neg.l		%d0			# new exponent = -(shft val)
16017	mov.l		%d0,%d1			# prepare for op_norm call
16018	bra.b		stzs_norm		# finish scaling
16019
16020###
16021
16022#########################################################################
16023# XDEF ****************************************************************	#
16024#	scale_sqrt(): scale the input operand exponent so a subsequent	#
16025#		      fsqrt operation won't take an exception.		#
16026#									#
16027# XREF ****************************************************************	#
16028#	norm() - normalize the mantissa if the operand was a DENORM	#
16029#									#
16030# INPUT ***************************************************************	#
16031#	FP_SCR0(a6) = extended precision operand to be scaled		#
16032# 									#
16033# OUTPUT **************************************************************	#
16034#	FP_SCR0(a6) = scaled extended precision operand			#
16035#	d0	    = scale value					#
16036#									#
16037# ALGORITHM ***********************************************************	#
16038#	If the input operand is a DENORM, normalize it.			#
16039# 	If the exponent of the input operand is even, set the exponent	#
16040# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the 	#
16041# exponent of the input operand is off, set the exponent to ox3fff and	#
16042# return a scale factor of "(exp-0x3fff)/2". 				#
16043#									#
16044#########################################################################
16045
16046	global		scale_sqrt
16047scale_sqrt:
16048	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
16049	beq.b		ss_denorm		# normalize the DENORM
16050
16051	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
16052	andi.l		&0x7fff,%d1		# extract operand's exponent
16053
16054	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
16055
16056	btst		&0x0,%d1		# is exp even or odd?
16057	beq.b		ss_norm_even
16058
16059	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16060
16061	mov.l		&0x3fff,%d0
16062	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16063	asr.l		&0x1,%d0		# divide scale factor by 2
16064	rts
16065
16066ss_norm_even:
16067	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16068
16069	mov.l		&0x3ffe,%d0
16070	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16071	asr.l		&0x1,%d0		# divide scale factor by 2
16072	rts
16073
16074ss_denorm:
16075	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
16076	bsr.l		norm			# normalize denorm
16077
16078	btst		&0x0,%d0		# is exp even or odd?
16079	beq.b		ss_denorm_even
16080
16081	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16082
16083	add.l		&0x3fff,%d0
16084	asr.l		&0x1,%d0		# divide scale factor by 2
16085	rts
16086
16087ss_denorm_even:
16088	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16089
16090	add.l		&0x3ffe,%d0
16091	asr.l		&0x1,%d0		# divide scale factor by 2
16092	rts
16093
16094###
16095
16096#########################################################################
16097# XDEF ****************************************************************	#
16098#	scale_to_zero_dst(): scale the exponent of extended precision	#
16099#			     value at FP_SCR1(a6).			#
16100#									#
16101# XREF ****************************************************************	#
16102#	norm() - normalize the mantissa if the operand was a DENORM	#
16103#									#
16104# INPUT ***************************************************************	#
16105#	FP_SCR1(a6) = extended precision operand to be scaled		#
16106# 									#
16107# OUTPUT **************************************************************	#
16108#	FP_SCR1(a6) = scaled extended precision operand			#
16109#	d0	    = scale value					#
16110#									#
16111# ALGORITHM ***********************************************************	#
16112# 	Set the exponent of the input operand to 0x3fff. Save the value	#
16113# of the difference between the original and new exponent. Then, 	#
16114# normalize the operand if it was a DENORM. Add this normalization	#
16115# value to the previous value. Return the result.			#
16116#									#
16117#########################################################################
16118
16119	global		scale_to_zero_dst
16120scale_to_zero_dst:
16121	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
16122	mov.w		%d1,%d0			# make a copy
16123
16124	andi.l		&0x7fff,%d1		# extract operand's exponent
16125
16126	andi.w		&0x8000,%d0		# extract operand's sgn
16127	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
16128
16129	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
16130
16131	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
16132	beq.b		stzd_denorm		# normalize the DENORM
16133
16134stzd_norm:
16135	mov.l		&0x3fff,%d0
16136	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16137	rts
16138
16139stzd_denorm:
16140	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
16141	bsr.l		norm			# normalize denorm
16142	neg.l		%d0			# new exponent = -(shft val)
16143	mov.l		%d0,%d1			# prepare for op_norm call
16144	bra.b		stzd_norm		# finish scaling
16145
16146##########################################################################
16147
16148#########################################################################
16149# XDEF ****************************************************************	#
16150#	res_qnan(): return default result w/ QNAN operand for dyadic	#
16151#	res_snan(): return default result w/ SNAN operand for dyadic	#
16152#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
16153#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
16154#									#
16155# XREF ****************************************************************	#
16156#	None								#
16157#									#
16158# INPUT ***************************************************************	#
16159#	FP_SRC(a6) = pointer to extended precision src operand		#
16160#	FP_DST(a6) = pointer to extended precision dst operand		#
16161# 									#
16162# OUTPUT **************************************************************	#
16163#	fp0 = default result						#
16164#									#
16165# ALGORITHM ***********************************************************	#
16166# 	If either operand (but not both operands) of an operation is a	#
16167# nonsignalling NAN, then that NAN is returned as the result. If both	#
16168# operands are nonsignalling NANs, then the destination operand 	#
16169# nonsignalling NAN is returned as the result.				#
16170# 	If either operand to an operation is a signalling NAN (SNAN),	#
16171# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
16172# enable bit is set in the FPCR, then the trap is taken and the 	#
16173# destination is not modified. If the SNAN trap enable bit is not set,	#
16174# then the SNAN is converted to a nonsignalling NAN (by setting the 	#
16175# SNAN bit in the operand to one), and the operation continues as 	#
16176# described in the preceding paragraph, for nonsignalling NANs.		#
16177#	Make sure the appropriate FPSR bits are set before exiting.	#
16178#									#
16179#########################################################################
16180
16181	global		res_qnan
16182	global		res_snan
16183res_qnan:
16184res_snan:
16185	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
16186	beq.b		dst_snan2
16187	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
16188	beq.b		dst_qnan2
16189src_nan:
16190	cmp.b		STAG(%a6), &QNAN
16191	beq.b		src_qnan2
16192	global		res_snan_1op
16193res_snan_1op:
16194src_snan2:
16195	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
16196	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16197	lea		FP_SRC(%a6), %a0
16198	bra.b		nan_comp
16199	global		res_qnan_1op
16200res_qnan_1op:
16201src_qnan2:
16202	or.l		&nan_mask, USER_FPSR(%a6)
16203	lea		FP_SRC(%a6), %a0
16204	bra.b		nan_comp
16205dst_snan2:
16206	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16207	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
16208	lea		FP_DST(%a6), %a0
16209	bra.b		nan_comp
16210dst_qnan2:
16211	lea		FP_DST(%a6), %a0
16212	cmp.b		STAG(%a6), &SNAN
16213	bne		nan_done
16214	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
16215nan_done:
16216	or.l		&nan_mask, USER_FPSR(%a6)
16217nan_comp:
16218	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
16219	beq.b		nan_not_neg
16220	or.l		&neg_mask, USER_FPSR(%a6)
16221nan_not_neg:
16222	fmovm.x		(%a0), &0x80
16223	rts
16224
16225#########################################################################
16226# XDEF ****************************************************************	#
16227# 	res_operr(): return default result during operand error		#
16228#									#
16229# XREF ****************************************************************	#
16230#	None								#
16231#									#
16232# INPUT ***************************************************************	#
16233#	None								#
16234# 									#
16235# OUTPUT **************************************************************	#
16236#	fp0 = default operand error result				#
16237#									#
16238# ALGORITHM ***********************************************************	#
16239#	An nonsignalling NAN is returned as the default result when	#
16240# an operand error occurs for the following cases:			#
16241#									#
16242# 	Multiply: (Infinity x Zero)					#
16243# 	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
16244#									#
16245#########################################################################
16246
16247	global		res_operr
16248res_operr:
16249	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
16250	fmovm.x		nan_return(%pc), &0x80
16251	rts
16252
16253nan_return:
16254	long		0x7fff0000, 0xffffffff, 0xffffffff
16255
16256#########################################################################
16257# fdbcc(): routine to emulate the fdbcc instruction			#
16258#									#
16259# XDEF **************************************************************** #
16260#	_fdbcc()							#
16261#									#
16262# XREF **************************************************************** #
16263#	fetch_dreg() - fetch Dn value					#
16264#	store_dreg_l() - store updated Dn value				#
16265#									#
16266# INPUT ***************************************************************	#
16267#	d0 = displacement						#
16268#									#
16269# OUTPUT ************************************************************** #
16270#	none								#
16271#									#
16272# ALGORITHM ***********************************************************	#
16273#	This routine checks which conditional predicate is specified by	#
16274# the stacked fdbcc instruction opcode and then branches to a routine	#
16275# for that predicate. The corresponding fbcc instruction is then used	#
16276# to see whether the condition (specified by the stacked FPSR) is true	#
16277# or false.								#
16278#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
16279# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
16280# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an 	#
16281# enabled BSUN should not be flagged and the predicate is true, then	#
16282# Dn is fetched and decremented by one. If Dn is not equal to -1, add	#
16283# the displacement value to the stacked PC so that when an "rte" is	#
16284# finally executed, the branch occurs.					#
16285#									#
16286#########################################################################
16287	global		_fdbcc
16288_fdbcc:
16289	mov.l		%d0,L_SCR1(%a6)		# save displacement
16290
16291	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
16292
16293	clr.l		%d1			# clear scratch reg
16294	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
16295	ror.l		&0x8,%d1		# rotate to top byte
16296	fmov.l		%d1,%fpsr		# insert into FPSR
16297
16298	mov.w		(tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
16299	jmp		(tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
16300
16301tbl_fdbcc:
16302	short		fdbcc_f		-	tbl_fdbcc	# 00
16303	short		fdbcc_eq	-	tbl_fdbcc	# 01
16304	short		fdbcc_ogt	-	tbl_fdbcc	# 02
16305	short		fdbcc_oge	-	tbl_fdbcc	# 03
16306	short		fdbcc_olt	-	tbl_fdbcc	# 04
16307	short		fdbcc_ole	-	tbl_fdbcc	# 05
16308	short		fdbcc_ogl	-	tbl_fdbcc	# 06
16309	short		fdbcc_or	-	tbl_fdbcc	# 07
16310	short		fdbcc_un	-	tbl_fdbcc	# 08
16311	short		fdbcc_ueq	-	tbl_fdbcc	# 09
16312	short		fdbcc_ugt	-	tbl_fdbcc	# 10
16313	short		fdbcc_uge	-	tbl_fdbcc	# 11
16314	short		fdbcc_ult	-	tbl_fdbcc	# 12
16315	short		fdbcc_ule	-	tbl_fdbcc	# 13
16316	short		fdbcc_neq	-	tbl_fdbcc	# 14
16317	short		fdbcc_t		-	tbl_fdbcc	# 15
16318	short		fdbcc_sf	-	tbl_fdbcc	# 16
16319	short		fdbcc_seq	-	tbl_fdbcc	# 17
16320	short		fdbcc_gt	-	tbl_fdbcc	# 18
16321	short		fdbcc_ge	-	tbl_fdbcc	# 19
16322	short		fdbcc_lt	-	tbl_fdbcc	# 20
16323	short		fdbcc_le	-	tbl_fdbcc	# 21
16324	short		fdbcc_gl	-	tbl_fdbcc	# 22
16325	short		fdbcc_gle	-	tbl_fdbcc	# 23
16326	short		fdbcc_ngle	-	tbl_fdbcc	# 24
16327	short		fdbcc_ngl	-	tbl_fdbcc	# 25
16328	short		fdbcc_nle	-	tbl_fdbcc	# 26
16329	short		fdbcc_nlt	-	tbl_fdbcc	# 27
16330	short		fdbcc_nge	-	tbl_fdbcc	# 28
16331	short		fdbcc_ngt	-	tbl_fdbcc	# 29
16332	short		fdbcc_sneq	-	tbl_fdbcc	# 30
16333	short		fdbcc_st	-	tbl_fdbcc	# 31
16334
16335#########################################################################
16336#									#
16337# IEEE Nonaware tests							#
16338#									#
16339# For the IEEE nonaware tests, only the false branch changes the 	#
16340# counter. However, the true branch may set bsun so we check to see	#
16341# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
16342#									#
16343# The cases EQ and NE are shared by the Aware and Nonaware groups	#
16344# and are incapable of setting the BSUN exception bit.			#
16345#									#
16346# Typically, only one of the two possible branch directions could	#
16347# have the NAN bit set.							#
16348# (This is assuming the mutual exclusiveness of FPSR cc bit groupings	#
16349#  is preserved.)							#
16350#									#
16351#########################################################################
16352
16353#
16354# equal:
16355#
16356#	Z
16357#
16358fdbcc_eq:
16359	fbeq.w		fdbcc_eq_yes		# equal?
16360fdbcc_eq_no:
16361	bra.w		fdbcc_false		# no; go handle counter
16362fdbcc_eq_yes:
16363	rts
16364
16365#
16366# not equal:
16367#	_
16368#	Z
16369#
16370fdbcc_neq:
16371	fbneq.w		fdbcc_neq_yes		# not equal?
16372fdbcc_neq_no:
16373	bra.w		fdbcc_false		# no; go handle counter
16374fdbcc_neq_yes:
16375	rts
16376
16377#
16378# greater than:
16379#	_______
16380#	NANvZvN
16381#
16382fdbcc_gt:
16383	fbgt.w		fdbcc_gt_yes		# greater than?
16384	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16385	beq.w		fdbcc_false		# no;go handle counter
16386	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16387	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16388	bne.w		fdbcc_bsun		# yes; we have an exception
16389	bra.w		fdbcc_false		# no; go handle counter
16390fdbcc_gt_yes:
16391	rts					# do nothing
16392
16393#
16394# not greater than:
16395#
16396#	NANvZvN
16397#
16398fdbcc_ngt:
16399	fbngt.w		fdbcc_ngt_yes		# not greater than?
16400fdbcc_ngt_no:
16401	bra.w		fdbcc_false		# no; go handle counter
16402fdbcc_ngt_yes:
16403	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16404	beq.b		fdbcc_ngt_done		# no;go finish
16405	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16406	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16407	bne.w		fdbcc_bsun		# yes; we have an exception
16408fdbcc_ngt_done:
16409	rts					# no; do nothing
16410
16411#
16412# greater than or equal:
16413#	   _____
16414#	Zv(NANvN)
16415#
16416fdbcc_ge:
16417	fbge.w		fdbcc_ge_yes		# greater than or equal?
16418fdbcc_ge_no:
16419	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16420	beq.w		fdbcc_false		# no;go handle counter
16421	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16422	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16423	bne.w		fdbcc_bsun		# yes; we have an exception
16424	bra.w		fdbcc_false		# no; go handle counter
16425fdbcc_ge_yes:
16426	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16427	beq.b		fdbcc_ge_yes_done	# no;go do nothing
16428	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16429	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16430	bne.w		fdbcc_bsun		# yes; we have an exception
16431fdbcc_ge_yes_done:
16432	rts					# do nothing
16433
16434#
16435# not (greater than or equal):
16436#	       _
16437#	NANv(N^Z)
16438#
16439fdbcc_nge:
16440	fbnge.w		fdbcc_nge_yes		# not (greater than or equal)?
16441fdbcc_nge_no:
16442	bra.w		fdbcc_false		# no; go handle counter
16443fdbcc_nge_yes:
16444	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16445	beq.b		fdbcc_nge_done		# no;go finish
16446	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16447	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16448	bne.w		fdbcc_bsun		# yes; we have an exception
16449fdbcc_nge_done:
16450	rts					# no; do nothing
16451
16452#
16453# less than:
16454#	   _____
16455#	N^(NANvZ)
16456#
16457fdbcc_lt:
16458	fblt.w		fdbcc_lt_yes		# less than?
16459fdbcc_lt_no:
16460	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16461	beq.w		fdbcc_false		# no; go handle counter
16462	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16463	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16464	bne.w		fdbcc_bsun		# yes; we have an exception
16465	bra.w		fdbcc_false		# no; go handle counter
16466fdbcc_lt_yes:
16467	rts					# do nothing
16468
16469#
16470# not less than:
16471#	       _
16472#	NANv(ZvN)
16473#
16474fdbcc_nlt:
16475	fbnlt.w		fdbcc_nlt_yes		# not less than?
16476fdbcc_nlt_no:
16477	bra.w		fdbcc_false		# no; go handle counter
16478fdbcc_nlt_yes:
16479	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16480	beq.b		fdbcc_nlt_done		# no;go finish
16481	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16482	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16483	bne.w		fdbcc_bsun		# yes; we have an exception
16484fdbcc_nlt_done:
16485	rts					# no; do nothing
16486
16487#
16488# less than or equal:
16489#	     ___
16490#	Zv(N^NAN)
16491#
16492fdbcc_le:
16493	fble.w		fdbcc_le_yes		# less than or equal?
16494fdbcc_le_no:
16495	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16496	beq.w		fdbcc_false		# no; go handle counter
16497	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16498	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16499	bne.w		fdbcc_bsun		# yes; we have an exception
16500	bra.w		fdbcc_false		# no; go handle counter
16501fdbcc_le_yes:
16502	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16503	beq.b		fdbcc_le_yes_done	# no; go do nothing
16504	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16505	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16506	bne.w		fdbcc_bsun		# yes; we have an exception
16507fdbcc_le_yes_done:
16508	rts					# do nothing
16509
16510#
16511# not (less than or equal):
16512#	     ___
16513#	NANv(NvZ)
16514#
16515fdbcc_nle:
16516	fbnle.w		fdbcc_nle_yes		# not (less than or equal)?
16517fdbcc_nle_no:
16518	bra.w		fdbcc_false		# no; go handle counter
16519fdbcc_nle_yes:
16520	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16521	beq.w		fdbcc_nle_done		# no; go finish
16522	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16523	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16524	bne.w		fdbcc_bsun		# yes; we have an exception
16525fdbcc_nle_done:
16526	rts					# no; do nothing
16527
16528#
16529# greater or less than:
16530#	_____
16531#	NANvZ
16532#
16533fdbcc_gl:
16534	fbgl.w		fdbcc_gl_yes		# greater or less than?
16535fdbcc_gl_no:
16536	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16537	beq.w		fdbcc_false		# no; handle counter
16538	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16539	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16540	bne.w		fdbcc_bsun		# yes; we have an exception
16541	bra.w		fdbcc_false		# no; go handle counter
16542fdbcc_gl_yes:
16543	rts					# do nothing
16544
16545#
16546# not (greater or less than):
16547#
16548#	NANvZ
16549#
16550fdbcc_ngl:
16551	fbngl.w		fdbcc_ngl_yes		# not (greater or less than)?
16552fdbcc_ngl_no:
16553	bra.w		fdbcc_false		# no; go handle counter
16554fdbcc_ngl_yes:
16555	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16556	beq.b		fdbcc_ngl_done		# no; go finish
16557	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16558	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16559	bne.w		fdbcc_bsun		# yes; we have an exception
16560fdbcc_ngl_done:
16561	rts					# no; do nothing
16562
16563#
16564# greater, less, or equal:
16565#	___
16566#	NAN
16567#
16568fdbcc_gle:
16569	fbgle.w		fdbcc_gle_yes		# greater, less, or equal?
16570fdbcc_gle_no:
16571	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16572	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16573	bne.w		fdbcc_bsun		# yes; we have an exception
16574	bra.w		fdbcc_false		# no; go handle counter
16575fdbcc_gle_yes:
16576	rts					# do nothing
16577
16578#
16579# not (greater, less, or equal):
16580#
16581#	NAN
16582#
16583fdbcc_ngle:
16584	fbngle.w	fdbcc_ngle_yes		# not (greater, less, or equal)?
16585fdbcc_ngle_no:
16586	bra.w		fdbcc_false		# no; go handle counter
16587fdbcc_ngle_yes:
16588	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16589	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16590	bne.w		fdbcc_bsun		# yes; we have an exception
16591	rts					# no; do nothing
16592
16593#########################################################################
16594#									#
16595# Miscellaneous tests							#
16596#									#
16597# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
16598#									#
16599#########################################################################
16600
16601#
16602# false:
16603#
16604#	False
16605#
16606fdbcc_f:					# no bsun possible
16607	bra.w		fdbcc_false		# go handle counter
16608
16609#
16610# true:
16611#
16612#	True
16613#
16614fdbcc_t:					# no bsun possible
16615	rts					# do nothing
16616
16617#
16618# signalling false:
16619#
16620#	False
16621#
16622fdbcc_sf:
16623	btst		&nan_bit, FPSR_CC(%a6) 	# is NAN set?
16624	beq.w		fdbcc_false		# no;go handle counter
16625	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16626	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16627	bne.w		fdbcc_bsun		# yes; we have an exception
16628	bra.w		fdbcc_false		# go handle counter
16629
16630#
16631# signalling true:
16632#
16633#	True
16634#
16635fdbcc_st:
16636	btst		&nan_bit, FPSR_CC(%a6) 	# is NAN set?
16637	beq.b		fdbcc_st_done		# no;go finish
16638	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16639	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16640	bne.w		fdbcc_bsun		# yes; we have an exception
16641fdbcc_st_done:
16642	rts
16643
16644#
16645# signalling equal:
16646#
16647#	Z
16648#
16649fdbcc_seq:
16650	fbseq.w		fdbcc_seq_yes		# signalling equal?
16651fdbcc_seq_no:
16652	btst		&nan_bit, FPSR_CC(%a6) 	# is NAN set?
16653	beq.w		fdbcc_false		# no;go handle counter
16654	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16655	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16656	bne.w		fdbcc_bsun		# yes; we have an exception
16657	bra.w		fdbcc_false		# go handle counter
16658fdbcc_seq_yes:
16659	btst		&nan_bit, FPSR_CC(%a6) 	# is NAN set?
16660	beq.b		fdbcc_seq_yes_done	# no;go do nothing
16661	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16662	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16663	bne.w		fdbcc_bsun		# yes; we have an exception
16664fdbcc_seq_yes_done:
16665	rts					# yes; do nothing
16666
16667#
16668# signalling not equal:
16669#	_
16670#	Z
16671#
16672fdbcc_sneq:
16673	fbsneq.w	fdbcc_sneq_yes		# signalling not equal?
16674fdbcc_sneq_no:
16675	btst		&nan_bit, FPSR_CC(%a6) 	# is NAN set?
16676	beq.w		fdbcc_false		# no;go handle counter
16677	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16678	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16679	bne.w		fdbcc_bsun		# yes; we have an exception
16680	bra.w		fdbcc_false		# go handle counter
16681fdbcc_sneq_yes:
16682	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
16683	beq.w		fdbcc_sneq_done		# no;go finish
16684	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16685	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16686	bne.w		fdbcc_bsun		# yes; we have an exception
16687fdbcc_sneq_done:
16688	rts
16689
16690#########################################################################
16691#									#
16692# IEEE Aware tests							#
16693#									#
16694# For the IEEE aware tests, action is only taken if the result is false.#
16695# Therefore, the opposite branch type is used to jump to the decrement	#
16696# routine. 								#
16697# The BSUN exception will not be set for any of these tests.		#
16698#									#
16699#########################################################################
16700
16701#
16702# ordered greater than:
16703#	_______
16704#	NANvZvN
16705#
16706fdbcc_ogt:
16707	fbogt.w		fdbcc_ogt_yes		# ordered greater than?
16708fdbcc_ogt_no:
16709	bra.w		fdbcc_false		# no; go handle counter
16710fdbcc_ogt_yes:
16711	rts					# yes; do nothing
16712
16713#
16714# unordered or less or equal:
16715#	_______
16716#	NANvZvN
16717#
16718fdbcc_ule:
16719	fbule.w		fdbcc_ule_yes		# unordered or less or equal?
16720fdbcc_ule_no:
16721	bra.w		fdbcc_false		# no; go handle counter
16722fdbcc_ule_yes:
16723	rts					# yes; do nothing
16724
16725#
16726# ordered greater than or equal:
16727#	   _____
16728#	Zv(NANvN)
16729#
16730fdbcc_oge:
16731	fboge.w		fdbcc_oge_yes		# ordered greater than or equal?
16732fdbcc_oge_no:
16733	bra.w		fdbcc_false		# no; go handle counter
16734fdbcc_oge_yes:
16735	rts					# yes; do nothing
16736
16737#
16738# unordered or less than:
16739#	       _
16740#	NANv(N^Z)
16741#
16742fdbcc_ult:
16743	fbult.w		fdbcc_ult_yes		# unordered or less than?
16744fdbcc_ult_no:
16745	bra.w		fdbcc_false		# no; go handle counter
16746fdbcc_ult_yes:
16747	rts					# yes; do nothing
16748
16749#
16750# ordered less than:
16751#	   _____
16752#	N^(NANvZ)
16753#
16754fdbcc_olt:
16755	fbolt.w		fdbcc_olt_yes		# ordered less than?
16756fdbcc_olt_no:
16757	bra.w		fdbcc_false		# no; go handle counter
16758fdbcc_olt_yes:
16759	rts					# yes; do nothing
16760
16761#
16762# unordered or greater or equal:
16763#
16764#	NANvZvN
16765#
16766fdbcc_uge:
16767	fbuge.w		fdbcc_uge_yes		# unordered or greater than?
16768fdbcc_uge_no:
16769	bra.w		fdbcc_false		# no; go handle counter
16770fdbcc_uge_yes:
16771	rts					# yes; do nothing
16772
16773#
16774# ordered less than or equal:
16775#	     ___
16776#	Zv(N^NAN)
16777#
16778fdbcc_ole:
16779	fbole.w		fdbcc_ole_yes		# ordered greater or less than?
16780fdbcc_ole_no:
16781	bra.w		fdbcc_false		# no; go handle counter
16782fdbcc_ole_yes:
16783	rts					# yes; do nothing
16784
16785#
16786# unordered or greater than:
16787#	     ___
16788#	NANv(NvZ)
16789#
16790fdbcc_ugt:
16791	fbugt.w		fdbcc_ugt_yes		# unordered or greater than?
16792fdbcc_ugt_no:
16793	bra.w		fdbcc_false		# no; go handle counter
16794fdbcc_ugt_yes:
16795	rts					# yes; do nothing
16796
16797#
16798# ordered greater or less than:
16799#	_____
16800#	NANvZ
16801#
16802fdbcc_ogl:
16803	fbogl.w		fdbcc_ogl_yes		# ordered greater or less than?
16804fdbcc_ogl_no:
16805	bra.w		fdbcc_false		# no; go handle counter
16806fdbcc_ogl_yes:
16807	rts					# yes; do nothing
16808
16809#
16810# unordered or equal:
16811#
16812#	NANvZ
16813#
16814fdbcc_ueq:
16815	fbueq.w		fdbcc_ueq_yes		# unordered or equal?
16816fdbcc_ueq_no:
16817	bra.w		fdbcc_false		# no; go handle counter
16818fdbcc_ueq_yes:
16819	rts					# yes; do nothing
16820
16821#
16822# ordered:
16823#	___
16824#	NAN
16825#
16826fdbcc_or:
16827	fbor.w		fdbcc_or_yes		# ordered?
16828fdbcc_or_no:
16829	bra.w		fdbcc_false		# no; go handle counter
16830fdbcc_or_yes:
16831	rts					# yes; do nothing
16832
16833#
16834# unordered:
16835#
16836#	NAN
16837#
16838fdbcc_un:
16839	fbun.w		fdbcc_un_yes		# unordered?
16840fdbcc_un_no:
16841	bra.w		fdbcc_false		# no; go handle counter
16842fdbcc_un_yes:
16843	rts					# yes; do nothing
16844
16845#######################################################################
16846
16847#
16848# the bsun exception bit was not set.
16849#
16850# (1) subtract 1 from the count register
16851# (2) if (cr == -1) then
16852#	pc = pc of next instruction
16853#     else
16854#	pc += sign_ext(16-bit displacement)
16855#
16856fdbcc_false:
16857	mov.b		1+EXC_OPWORD(%a6), %d1	# fetch lo opword
16858	andi.w		&0x7, %d1		# extract count register
16859
16860	bsr.l		fetch_dreg		# fetch count value
16861# make sure that d0 isn't corrupted between calls...
16862
16863	subq.w		&0x1, %d0		# Dn - 1 -> Dn
16864
16865	bsr.l		store_dreg_l		# store new count value
16866
16867	cmpi.w		%d0, &-0x1		# is (Dn == -1)?
16868	bne.b		fdbcc_false_cont	# no;
16869	rts
16870
16871fdbcc_false_cont:
16872	mov.l		L_SCR1(%a6),%d0		# fetch displacement
16873	add.l		USER_FPIAR(%a6),%d0	# add instruction PC
16874	addq.l		&0x4,%d0		# add instruction length
16875	mov.l		%d0,EXC_PC(%a6)		# set new PC
16876	rts
16877
16878# the emulation routine set bsun and BSUN was enabled. have to
16879# fix stack and jump to the bsun handler.
16880# let the caller of this routine shift the stack frame up to
16881# eliminate the effective address field.
16882fdbcc_bsun:
16883	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
16884	rts
16885
16886#########################################################################
16887# ftrapcc(): routine to emulate the ftrapcc instruction			#
16888#									#
16889# XDEF ****************************************************************	#
16890#	_ftrapcc()							#
16891#									#
16892# XREF ****************************************************************	#
16893#	none								#
16894#									#
16895# INPUT *************************************************************** #
16896#	none								#
16897#									#
16898# OUTPUT ************************************************************** #
16899#	none								#
16900#									#
16901# ALGORITHM *********************************************************** #
16902#	This routine checks which conditional predicate is specified by	#
16903# the stacked ftrapcc instruction opcode and then branches to a routine	#
16904# for that predicate. The corresponding fbcc instruction is then used	#
16905# to see whether the condition (specified by the stacked FPSR) is true	#
16906# or false.								#
16907#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
16908# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
16909# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an 	#
16910# enabled BSUN should not be flagged and the predicate is true, then	#
16911# the ftrapcc_flg is set in the SPCOND_FLG location. These special	#
16912# flags indicate to the calling routine to emulate the exceptional	#
16913# condition.								#
16914#									#
16915#########################################################################
16916
16917	global		_ftrapcc
16918_ftrapcc:
16919	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
16920
16921	clr.l		%d1			# clear scratch reg
16922	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
16923	ror.l		&0x8,%d1		# rotate to top byte
16924	fmov.l		%d1,%fpsr		# insert into FPSR
16925
16926	mov.w		(tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
16927	jmp		(tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
16928
16929tbl_ftrapcc:
16930	short		ftrapcc_f	-	tbl_ftrapcc	# 00
16931	short		ftrapcc_eq	-	tbl_ftrapcc	# 01
16932	short		ftrapcc_ogt	-	tbl_ftrapcc	# 02
16933	short		ftrapcc_oge	-	tbl_ftrapcc	# 03
16934	short		ftrapcc_olt	-	tbl_ftrapcc	# 04
16935	short		ftrapcc_ole	-	tbl_ftrapcc	# 05
16936	short		ftrapcc_ogl	-	tbl_ftrapcc	# 06
16937	short		ftrapcc_or	-	tbl_ftrapcc	# 07
16938	short		ftrapcc_un	-	tbl_ftrapcc	# 08
16939	short		ftrapcc_ueq	-	tbl_ftrapcc	# 09
16940	short		ftrapcc_ugt	-	tbl_ftrapcc	# 10
16941	short		ftrapcc_uge	-	tbl_ftrapcc	# 11
16942	short		ftrapcc_ult	-	tbl_ftrapcc	# 12
16943	short		ftrapcc_ule	-	tbl_ftrapcc	# 13
16944	short		ftrapcc_neq	-	tbl_ftrapcc	# 14
16945	short		ftrapcc_t	-	tbl_ftrapcc	# 15
16946	short		ftrapcc_sf	-	tbl_ftrapcc	# 16
16947	short		ftrapcc_seq	-	tbl_ftrapcc	# 17
16948	short		ftrapcc_gt	-	tbl_ftrapcc	# 18
16949	short		ftrapcc_ge	-	tbl_ftrapcc	# 19
16950	short		ftrapcc_lt	-	tbl_ftrapcc	# 20
16951	short		ftrapcc_le	-	tbl_ftrapcc	# 21
16952	short		ftrapcc_gl	-	tbl_ftrapcc	# 22
16953	short		ftrapcc_gle	-	tbl_ftrapcc	# 23
16954	short		ftrapcc_ngle	-	tbl_ftrapcc	# 24
16955	short		ftrapcc_ngl	-	tbl_ftrapcc	# 25
16956	short		ftrapcc_nle	-	tbl_ftrapcc	# 26
16957	short		ftrapcc_nlt	-	tbl_ftrapcc	# 27
16958	short		ftrapcc_nge	-	tbl_ftrapcc	# 28
16959	short		ftrapcc_ngt	-	tbl_ftrapcc	# 29
16960	short		ftrapcc_sneq	-	tbl_ftrapcc	# 30
16961	short		ftrapcc_st	-	tbl_ftrapcc	# 31
16962
16963#########################################################################
16964#									#
16965# IEEE Nonaware tests							#
16966#									#
16967# For the IEEE nonaware tests, we set the result based on the		#
16968# floating point condition codes. In addition, we check to see		#
16969# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
16970#									#
16971# The cases EQ and NE are shared by the Aware and Nonaware groups	#
16972# and are incapable of setting the BSUN exception bit.			#
16973#									#
16974# Typically, only one of the two possible branch directions could	#
16975# have the NAN bit set.							#
16976#									#
16977#########################################################################
16978
16979#
16980# equal:
16981#
16982#	Z
16983#
16984ftrapcc_eq:
16985	fbeq.w		ftrapcc_trap		# equal?
16986ftrapcc_eq_no:
16987	rts					# do nothing
16988
16989#
16990# not equal:
16991#	_
16992#	Z
16993#
16994ftrapcc_neq:
16995	fbneq.w		ftrapcc_trap		# not equal?
16996ftrapcc_neq_no:
16997	rts					# do nothing
16998
16999#
17000# greater than:
17001#	_______
17002#	NANvZvN
17003#
17004ftrapcc_gt:
17005	fbgt.w		ftrapcc_trap		# greater than?
17006ftrapcc_gt_no:
17007	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17008	beq.b		ftrapcc_gt_done		# no
17009	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17010	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17011	bne.w		ftrapcc_bsun		# yes
17012ftrapcc_gt_done:
17013	rts					# no; do nothing
17014
17015#
17016# not greater than:
17017#
17018#	NANvZvN
17019#
17020ftrapcc_ngt:
17021	fbngt.w		ftrapcc_ngt_yes		# not greater than?
17022ftrapcc_ngt_no:
17023	rts					# do nothing
17024ftrapcc_ngt_yes:
17025	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17026	beq.w		ftrapcc_trap		# no; go take trap
17027	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17028	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17029	bne.w		ftrapcc_bsun		# yes
17030	bra.w		ftrapcc_trap		# no; go take trap
17031
17032#
17033# greater than or equal:
17034#	   _____
17035#	Zv(NANvN)
17036#
17037ftrapcc_ge:
17038	fbge.w		ftrapcc_ge_yes		# greater than or equal?
17039ftrapcc_ge_no:
17040	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17041	beq.b		ftrapcc_ge_done		# no; go finish
17042	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17043	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17044	bne.w		ftrapcc_bsun		# yes
17045ftrapcc_ge_done:
17046	rts					# no; do nothing
17047ftrapcc_ge_yes:
17048	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17049	beq.w		ftrapcc_trap		# no; go take trap
17050	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17051	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17052	bne.w		ftrapcc_bsun		# yes
17053	bra.w		ftrapcc_trap		# no; go take trap
17054
17055#
17056# not (greater than or equal):
17057#	       _
17058#	NANv(N^Z)
17059#
17060ftrapcc_nge:
17061	fbnge.w		ftrapcc_nge_yes		# not (greater than or equal)?
17062ftrapcc_nge_no:
17063	rts					# do nothing
17064ftrapcc_nge_yes:
17065	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17066	beq.w		ftrapcc_trap		# no; go take trap
17067	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17068	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17069	bne.w		ftrapcc_bsun		# yes
17070	bra.w		ftrapcc_trap		# no; go take trap
17071
17072#
17073# less than:
17074#	   _____
17075#	N^(NANvZ)
17076#
17077ftrapcc_lt:
17078	fblt.w		ftrapcc_trap		# less than?
17079ftrapcc_lt_no:
17080	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17081	beq.b		ftrapcc_lt_done		# no; go finish
17082	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17083	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17084	bne.w		ftrapcc_bsun		# yes
17085ftrapcc_lt_done:
17086	rts					# no; do nothing
17087
17088#
17089# not less than:
17090#	       _
17091#	NANv(ZvN)
17092#
17093ftrapcc_nlt:
17094	fbnlt.w		ftrapcc_nlt_yes		# not less than?
17095ftrapcc_nlt_no:
17096	rts					# do nothing
17097ftrapcc_nlt_yes:
17098	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17099	beq.w		ftrapcc_trap		# no; go take trap
17100	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17101	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17102	bne.w		ftrapcc_bsun		# yes
17103	bra.w		ftrapcc_trap		# no; go take trap
17104
17105#
17106# less than or equal:
17107#	     ___
17108#	Zv(N^NAN)
17109#
17110ftrapcc_le:
17111	fble.w		ftrapcc_le_yes		# less than or equal?
17112ftrapcc_le_no:
17113	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17114	beq.b		ftrapcc_le_done		# no; go finish
17115	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17116	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17117	bne.w		ftrapcc_bsun		# yes
17118ftrapcc_le_done:
17119	rts					# no; do nothing
17120ftrapcc_le_yes:
17121	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17122	beq.w		ftrapcc_trap		# no; go take trap
17123	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17124	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17125	bne.w		ftrapcc_bsun		# yes
17126	bra.w		ftrapcc_trap		# no; go take trap
17127
17128#
17129# not (less than or equal):
17130#	     ___
17131#	NANv(NvZ)
17132#
17133ftrapcc_nle:
17134	fbnle.w		ftrapcc_nle_yes		# not (less than or equal)?
17135ftrapcc_nle_no:
17136	rts					# do nothing
17137ftrapcc_nle_yes:
17138	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17139	beq.w		ftrapcc_trap		# no; go take trap
17140	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17141	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17142	bne.w		ftrapcc_bsun		# yes
17143	bra.w		ftrapcc_trap		# no; go take trap
17144
17145#
17146# greater or less than:
17147#	_____
17148#	NANvZ
17149#
17150ftrapcc_gl:
17151	fbgl.w		ftrapcc_trap		# greater or less than?
17152ftrapcc_gl_no:
17153	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17154	beq.b		ftrapcc_gl_done		# no; go finish
17155	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17156	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17157	bne.w		ftrapcc_bsun		# yes
17158ftrapcc_gl_done:
17159	rts					# no; do nothing
17160
17161#
17162# not (greater or less than):
17163#
17164#	NANvZ
17165#
17166ftrapcc_ngl:
17167	fbngl.w		ftrapcc_ngl_yes		# not (greater or less than)?
17168ftrapcc_ngl_no:
17169	rts					# do nothing
17170ftrapcc_ngl_yes:
17171	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17172	beq.w		ftrapcc_trap		# no; go take trap
17173	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17174	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17175	bne.w		ftrapcc_bsun		# yes
17176	bra.w		ftrapcc_trap		# no; go take trap
17177
17178#
17179# greater, less, or equal:
17180#	___
17181#	NAN
17182#
17183ftrapcc_gle:
17184	fbgle.w		ftrapcc_trap		# greater, less, or equal?
17185ftrapcc_gle_no:
17186	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17187	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17188	bne.w		ftrapcc_bsun		# yes
17189	rts					# no; do nothing
17190
17191#
17192# not (greater, less, or equal):
17193#
17194#	NAN
17195#
17196ftrapcc_ngle:
17197	fbngle.w	ftrapcc_ngle_yes	# not (greater, less, or equal)?
17198ftrapcc_ngle_no:
17199	rts					# do nothing
17200ftrapcc_ngle_yes:
17201	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17202	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17203	bne.w		ftrapcc_bsun		# yes
17204	bra.w		ftrapcc_trap		# no; go take trap
17205
17206#########################################################################
17207#									#
17208# Miscellaneous tests							#
17209#									#
17210# For the IEEE aware tests, we only have to set the result based on the	#
17211# floating point condition codes. The BSUN exception will not be	#
17212# set for any of these tests.						#
17213#									#
17214#########################################################################
17215
17216#
17217# false:
17218#
17219#	False
17220#
17221ftrapcc_f:
17222	rts					# do nothing
17223
17224#
17225# true:
17226#
17227#	True
17228#
17229ftrapcc_t:
17230	bra.w		ftrapcc_trap		# go take trap
17231
17232#
17233# signalling false:
17234#
17235#	False
17236#
17237ftrapcc_sf:
17238	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17239	beq.b		ftrapcc_sf_done		# no; go finish
17240	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17241	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17242	bne.w		ftrapcc_bsun		# yes
17243ftrapcc_sf_done:
17244	rts					# no; do nothing
17245
17246#
17247# signalling true:
17248#
17249#	True
17250#
17251ftrapcc_st:
17252	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17253	beq.w		ftrapcc_trap		# no; go take trap
17254	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17255	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17256	bne.w		ftrapcc_bsun		# yes
17257	bra.w		ftrapcc_trap		# no; go take trap
17258
17259#
17260# signalling equal:
17261#
17262#	Z
17263#
17264ftrapcc_seq:
17265	fbseq.w		ftrapcc_seq_yes		# signalling equal?
17266ftrapcc_seq_no:
17267	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17268	beq.w		ftrapcc_seq_done	# no; go finish
17269	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17270	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17271	bne.w		ftrapcc_bsun		# yes
17272ftrapcc_seq_done:
17273	rts					# no; do nothing
17274ftrapcc_seq_yes:
17275	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17276	beq.w		ftrapcc_trap		# no; go take trap
17277	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17278	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17279	bne.w		ftrapcc_bsun		# yes
17280	bra.w		ftrapcc_trap		# no; go take trap
17281
17282#
17283# signalling not equal:
17284#	_
17285#	Z
17286#
17287ftrapcc_sneq:
17288	fbsneq.w	ftrapcc_sneq_yes	# signalling equal?
17289ftrapcc_sneq_no:
17290	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17291	beq.w		ftrapcc_sneq_no_done	# no; go finish
17292	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17293	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17294	bne.w		ftrapcc_bsun		# yes
17295ftrapcc_sneq_no_done:
17296	rts					# do nothing
17297ftrapcc_sneq_yes:
17298	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17299	beq.w		ftrapcc_trap		# no; go take trap
17300	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17301	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17302	bne.w		ftrapcc_bsun		# yes
17303	bra.w		ftrapcc_trap		# no; go take trap
17304
17305#########################################################################
17306#									#
17307# IEEE Aware tests							#
17308#									#
17309# For the IEEE aware tests, we only have to set the result based on the	#
17310# floating point condition codes. The BSUN exception will not be	#
17311# set for any of these tests.						#
17312#									#
17313#########################################################################
17314
17315#
17316# ordered greater than:
17317#	_______
17318#	NANvZvN
17319#
17320ftrapcc_ogt:
17321	fbogt.w		ftrapcc_trap		# ordered greater than?
17322ftrapcc_ogt_no:
17323	rts					# do nothing
17324
17325#
17326# unordered or less or equal:
17327#	_______
17328#	NANvZvN
17329#
17330ftrapcc_ule:
17331	fbule.w		ftrapcc_trap		# unordered or less or equal?
17332ftrapcc_ule_no:
17333	rts					# do nothing
17334
17335#
17336# ordered greater than or equal:
17337#	   _____
17338#	Zv(NANvN)
17339#
17340ftrapcc_oge:
17341	fboge.w		ftrapcc_trap		# ordered greater than or equal?
17342ftrapcc_oge_no:
17343	rts					# do nothing
17344
17345#
17346# unordered or less than:
17347#	       _
17348#	NANv(N^Z)
17349#
17350ftrapcc_ult:
17351	fbult.w		ftrapcc_trap		# unordered or less than?
17352ftrapcc_ult_no:
17353	rts					# do nothing
17354
17355#
17356# ordered less than:
17357#	   _____
17358#	N^(NANvZ)
17359#
17360ftrapcc_olt:
17361	fbolt.w		ftrapcc_trap		# ordered less than?
17362ftrapcc_olt_no:
17363	rts					# do nothing
17364
17365#
17366# unordered or greater or equal:
17367#
17368#	NANvZvN
17369#
17370ftrapcc_uge:
17371	fbuge.w		ftrapcc_trap		# unordered or greater than?
17372ftrapcc_uge_no:
17373	rts					# do nothing
17374
17375#
17376# ordered less than or equal:
17377#	     ___
17378#	Zv(N^NAN)
17379#
17380ftrapcc_ole:
17381	fbole.w		ftrapcc_trap		# ordered greater or less than?
17382ftrapcc_ole_no:
17383	rts					# do nothing
17384
17385#
17386# unordered or greater than:
17387#	     ___
17388#	NANv(NvZ)
17389#
17390ftrapcc_ugt:
17391	fbugt.w		ftrapcc_trap		# unordered or greater than?
17392ftrapcc_ugt_no:
17393	rts					# do nothing
17394
17395#
17396# ordered greater or less than:
17397#	_____
17398#	NANvZ
17399#
17400ftrapcc_ogl:
17401	fbogl.w		ftrapcc_trap		# ordered greater or less than?
17402ftrapcc_ogl_no:
17403	rts					# do nothing
17404
17405#
17406# unordered or equal:
17407#
17408#	NANvZ
17409#
17410ftrapcc_ueq:
17411	fbueq.w		ftrapcc_trap		# unordered or equal?
17412ftrapcc_ueq_no:
17413	rts					# do nothing
17414
17415#
17416# ordered:
17417#	___
17418#	NAN
17419#
17420ftrapcc_or:
17421	fbor.w		ftrapcc_trap		# ordered?
17422ftrapcc_or_no:
17423	rts					# do nothing
17424
17425#
17426# unordered:
17427#
17428#	NAN
17429#
17430ftrapcc_un:
17431	fbun.w		ftrapcc_trap		# unordered?
17432ftrapcc_un_no:
17433	rts					# do nothing
17434
17435#######################################################################
17436
17437# the bsun exception bit was not set.
17438# we will need to jump to the ftrapcc vector. the stack frame
17439# is the same size as that of the fp unimp instruction. the
17440# only difference is that the <ea> field should hold the PC
17441# of the ftrapcc instruction and the vector offset field
17442# should denote the ftrapcc trap.
17443ftrapcc_trap:
17444	mov.b		&ftrapcc_flg,SPCOND_FLG(%a6)
17445	rts
17446
17447# the emulation routine set bsun and BSUN was enabled. have to
17448# fix stack and jump to the bsun handler.
17449# let the caller of this routine shift the stack frame up to
17450# eliminate the effective address field.
17451ftrapcc_bsun:
17452	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
17453	rts
17454
17455#########################################################################
17456# fscc(): routine to emulate the fscc instruction			#
17457#									#
17458# XDEF **************************************************************** #
17459#	_fscc()								#
17460#									#
17461# XREF **************************************************************** #
17462#	store_dreg_b() - store result to data register file		#
17463#	dec_areg() - decrement an areg for -(an) mode			#
17464#	inc_areg() - increment an areg for (an)+ mode			#
17465#	_dmem_write_byte() - store result to memory			#
17466#									#
17467# INPUT ***************************************************************	#
17468#	none								#
17469#									#
17470# OUTPUT ************************************************************** #
17471#	none								#
17472#									#
17473# ALGORITHM ***********************************************************	#
17474#	This routine checks which conditional predicate is specified by	#
17475# the stacked fscc instruction opcode and then branches to a routine	#
17476# for that predicate. The corresponding fbcc instruction is then used	#
17477# to see whether the condition (specified by the stacked FPSR) is true	#
17478# or false.								#
17479#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
17480# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
17481# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an 	#
17482# enabled BSUN should not be flagged and the predicate is true, then	#
17483# the result is stored to the data register file or memory		#
17484#									#
17485#########################################################################
17486
17487	global		_fscc
17488_fscc:
17489	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
17490
17491	clr.l		%d1			# clear scratch reg
17492	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
17493	ror.l		&0x8,%d1		# rotate to top byte
17494	fmov.l		%d1,%fpsr		# insert into FPSR
17495
17496	mov.w		(tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
17497	jmp		(tbl_fscc.b,%pc,%d1.w) 	# jump to fscc routine
17498
17499tbl_fscc:
17500	short		fscc_f		-	tbl_fscc	# 00
17501	short		fscc_eq		-	tbl_fscc	# 01
17502	short		fscc_ogt	-	tbl_fscc	# 02
17503	short		fscc_oge	-	tbl_fscc	# 03
17504	short		fscc_olt	-	tbl_fscc	# 04
17505	short		fscc_ole	-	tbl_fscc	# 05
17506	short		fscc_ogl	-	tbl_fscc	# 06
17507	short		fscc_or		-	tbl_fscc	# 07
17508	short		fscc_un		-	tbl_fscc	# 08
17509	short		fscc_ueq	-	tbl_fscc	# 09
17510	short		fscc_ugt	-	tbl_fscc	# 10
17511	short		fscc_uge	-	tbl_fscc	# 11
17512	short		fscc_ult	-	tbl_fscc	# 12
17513	short		fscc_ule	-	tbl_fscc	# 13
17514	short		fscc_neq	-	tbl_fscc	# 14
17515	short		fscc_t		-	tbl_fscc	# 15
17516	short		fscc_sf		-	tbl_fscc	# 16
17517	short		fscc_seq	-	tbl_fscc	# 17
17518	short		fscc_gt		-	tbl_fscc	# 18
17519	short		fscc_ge		-	tbl_fscc	# 19
17520	short		fscc_lt		-	tbl_fscc	# 20
17521	short		fscc_le		-	tbl_fscc	# 21
17522	short		fscc_gl		-	tbl_fscc	# 22
17523	short		fscc_gle	-	tbl_fscc	# 23
17524	short		fscc_ngle	-	tbl_fscc	# 24
17525	short		fscc_ngl	-	tbl_fscc	# 25
17526	short		fscc_nle	-	tbl_fscc	# 26
17527	short		fscc_nlt	-	tbl_fscc	# 27
17528	short		fscc_nge	-	tbl_fscc	# 28
17529	short		fscc_ngt	-	tbl_fscc	# 29
17530	short		fscc_sneq	-	tbl_fscc	# 30
17531	short		fscc_st		-	tbl_fscc	# 31
17532
17533#########################################################################
17534#									#
17535# IEEE Nonaware tests							#
17536#									#
17537# For the IEEE nonaware tests, we set the result based on the		#
17538# floating point condition codes. In addition, we check to see		#
17539# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
17540#									#
17541# The cases EQ and NE are shared by the Aware and Nonaware groups	#
17542# and are incapable of setting the BSUN exception bit.			#
17543#									#
17544# Typically, only one of the two possible branch directions could	#
17545# have the NAN bit set.							#
17546#									#
17547#########################################################################
17548
17549#
17550# equal:
17551#
17552#	Z
17553#
17554fscc_eq:
17555	fbeq.w		fscc_eq_yes		# equal?
17556fscc_eq_no:
17557	clr.b		%d0			# set false
17558	bra.w		fscc_done		# go finish
17559fscc_eq_yes:
17560	st		%d0			# set true
17561	bra.w		fscc_done		# go finish
17562
17563#
17564# not equal:
17565#	_
17566#	Z
17567#
17568fscc_neq:
17569	fbneq.w		fscc_neq_yes		# not equal?
17570fscc_neq_no:
17571	clr.b		%d0			# set false
17572	bra.w		fscc_done		# go finish
17573fscc_neq_yes:
17574	st		%d0			# set true
17575	bra.w		fscc_done		# go finish
17576
17577#
17578# greater than:
17579#	_______
17580#	NANvZvN
17581#
17582fscc_gt:
17583	fbgt.w		fscc_gt_yes		# greater than?
17584fscc_gt_no:
17585	clr.b		%d0			# set false
17586	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17587	beq.w		fscc_done		# no;go finish
17588	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17589	bra.w		fscc_chk_bsun		# go finish
17590fscc_gt_yes:
17591	st		%d0			# set true
17592	bra.w		fscc_done		# go finish
17593
17594#
17595# not greater than:
17596#
17597#	NANvZvN
17598#
17599fscc_ngt:
17600	fbngt.w		fscc_ngt_yes		# not greater than?
17601fscc_ngt_no:
17602	clr.b		%d0			# set false
17603	bra.w		fscc_done		# go finish
17604fscc_ngt_yes:
17605	st		%d0			# set true
17606	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17607	beq.w		fscc_done		# no;go finish
17608	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17609	bra.w		fscc_chk_bsun		# go finish
17610
17611#
17612# greater than or equal:
17613#	   _____
17614#	Zv(NANvN)
17615#
17616fscc_ge:
17617	fbge.w		fscc_ge_yes		# greater than or equal?
17618fscc_ge_no:
17619	clr.b		%d0			# set false
17620	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17621	beq.w		fscc_done		# no;go finish
17622	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17623	bra.w		fscc_chk_bsun		# go finish
17624fscc_ge_yes:
17625	st		%d0			# set true
17626	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17627	beq.w		fscc_done		# no;go finish
17628	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17629	bra.w		fscc_chk_bsun		# go finish
17630
17631#
17632# not (greater than or equal):
17633#	       _
17634#	NANv(N^Z)
17635#
17636fscc_nge:
17637	fbnge.w		fscc_nge_yes		# not (greater than or equal)?
17638fscc_nge_no:
17639	clr.b		%d0			# set false
17640	bra.w		fscc_done		# go finish
17641fscc_nge_yes:
17642	st		%d0			# set true
17643	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17644	beq.w		fscc_done		# no;go finish
17645	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17646	bra.w		fscc_chk_bsun		# go finish
17647
17648#
17649# less than:
17650#	   _____
17651#	N^(NANvZ)
17652#
17653fscc_lt:
17654	fblt.w		fscc_lt_yes		# less than?
17655fscc_lt_no:
17656	clr.b		%d0			# set false
17657	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17658	beq.w		fscc_done		# no;go finish
17659	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17660	bra.w		fscc_chk_bsun		# go finish
17661fscc_lt_yes:
17662	st		%d0			# set true
17663	bra.w		fscc_done		# go finish
17664
17665#
17666# not less than:
17667#	       _
17668#	NANv(ZvN)
17669#
17670fscc_nlt:
17671	fbnlt.w		fscc_nlt_yes		# not less than?
17672fscc_nlt_no:
17673	clr.b		%d0			# set false
17674	bra.w		fscc_done		# go finish
17675fscc_nlt_yes:
17676	st		%d0			# set true
17677	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17678	beq.w		fscc_done		# no;go finish
17679	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17680	bra.w		fscc_chk_bsun		# go finish
17681
17682#
17683# less than or equal:
17684#	     ___
17685#	Zv(N^NAN)
17686#
17687fscc_le:
17688	fble.w		fscc_le_yes		# less than or equal?
17689fscc_le_no:
17690	clr.b		%d0			# set false
17691	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17692	beq.w		fscc_done		# no;go finish
17693	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17694	bra.w		fscc_chk_bsun		# go finish
17695fscc_le_yes:
17696	st		%d0			# set true
17697	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17698	beq.w		fscc_done		# no;go finish
17699	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17700	bra.w		fscc_chk_bsun		# go finish
17701
17702#
17703# not (less than or equal):
17704#	     ___
17705#	NANv(NvZ)
17706#
17707fscc_nle:
17708	fbnle.w		fscc_nle_yes		# not (less than or equal)?
17709fscc_nle_no:
17710	clr.b		%d0			# set false
17711	bra.w		fscc_done		# go finish
17712fscc_nle_yes:
17713	st		%d0			# set true
17714	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17715	beq.w		fscc_done		# no;go finish
17716	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17717	bra.w		fscc_chk_bsun		# go finish
17718
17719#
17720# greater or less than:
17721#	_____
17722#	NANvZ
17723#
17724fscc_gl:
17725	fbgl.w		fscc_gl_yes		# greater or less than?
17726fscc_gl_no:
17727	clr.b		%d0			# set false
17728	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17729	beq.w		fscc_done		# no;go finish
17730	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17731	bra.w		fscc_chk_bsun		# go finish
17732fscc_gl_yes:
17733	st		%d0			# set true
17734	bra.w		fscc_done		# go finish
17735
17736#
17737# not (greater or less than):
17738#
17739#	NANvZ
17740#
17741fscc_ngl:
17742	fbngl.w		fscc_ngl_yes		# not (greater or less than)?
17743fscc_ngl_no:
17744	clr.b		%d0			# set false
17745	bra.w		fscc_done		# go finish
17746fscc_ngl_yes:
17747	st		%d0			# set true
17748	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17749	beq.w		fscc_done		# no;go finish
17750	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17751	bra.w		fscc_chk_bsun		# go finish
17752
17753#
17754# greater, less, or equal:
17755#	___
17756#	NAN
17757#
17758fscc_gle:
17759	fbgle.w		fscc_gle_yes		# greater, less, or equal?
17760fscc_gle_no:
17761	clr.b		%d0			# set false
17762	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17763	bra.w		fscc_chk_bsun		# go finish
17764fscc_gle_yes:
17765	st		%d0			# set true
17766	bra.w		fscc_done		# go finish
17767
17768#
17769# not (greater, less, or equal):
17770#
17771#	NAN
17772#
17773fscc_ngle:
17774	fbngle.w		fscc_ngle_yes	# not (greater, less, or equal)?
17775fscc_ngle_no:
17776	clr.b		%d0			# set false
17777	bra.w		fscc_done		# go finish
17778fscc_ngle_yes:
17779	st		%d0			# set true
17780	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17781	bra.w		fscc_chk_bsun		# go finish
17782
17783#########################################################################
17784#									#
17785# Miscellaneous tests							#
17786#									#
17787# For the IEEE aware tests, we only have to set the result based on the	#
17788# floating point condition codes. The BSUN exception will not be	#
17789# set for any of these tests.						#
17790#									#
17791#########################################################################
17792
17793#
17794# false:
17795#
17796#	False
17797#
17798fscc_f:
17799	clr.b		%d0			# set false
17800	bra.w		fscc_done		# go finish
17801
17802#
17803# true:
17804#
17805#	True
17806#
17807fscc_t:
17808	st		%d0			# set true
17809	bra.w		fscc_done		# go finish
17810
17811#
17812# signalling false:
17813#
17814#	False
17815#
17816fscc_sf:
17817	clr.b		%d0			# set false
17818	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17819	beq.w		fscc_done		# no;go finish
17820	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17821	bra.w		fscc_chk_bsun		# go finish
17822
17823#
17824# signalling true:
17825#
17826#	True
17827#
17828fscc_st:
17829	st		%d0			# set false
17830	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17831	beq.w		fscc_done		# no;go finish
17832	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17833	bra.w		fscc_chk_bsun		# go finish
17834
17835#
17836# signalling equal:
17837#
17838#	Z
17839#
17840fscc_seq:
17841	fbseq.w		fscc_seq_yes		# signalling equal?
17842fscc_seq_no:
17843	clr.b		%d0			# set false
17844	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17845	beq.w		fscc_done		# no;go finish
17846	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17847	bra.w		fscc_chk_bsun		# go finish
17848fscc_seq_yes:
17849	st		%d0			# set true
17850	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17851	beq.w		fscc_done		# no;go finish
17852	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17853	bra.w		fscc_chk_bsun		# go finish
17854
17855#
17856# signalling not equal:
17857#	_
17858#	Z
17859#
17860fscc_sneq:
17861	fbsneq.w	fscc_sneq_yes		# signalling equal?
17862fscc_sneq_no:
17863	clr.b		%d0			# set false
17864	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17865	beq.w		fscc_done		# no;go finish
17866	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17867	bra.w		fscc_chk_bsun		# go finish
17868fscc_sneq_yes:
17869	st		%d0			# set true
17870	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
17871	beq.w		fscc_done		# no;go finish
17872	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17873	bra.w		fscc_chk_bsun		# go finish
17874
17875#########################################################################
17876#									#
17877# IEEE Aware tests							#
17878#									#
17879# For the IEEE aware tests, we only have to set the result based on the	#
17880# floating point condition codes. The BSUN exception will not be	#
17881# set for any of these tests.						#
17882#									#
17883#########################################################################
17884
17885#
17886# ordered greater than:
17887#	_______
17888#	NANvZvN
17889#
17890fscc_ogt:
17891	fbogt.w		fscc_ogt_yes		# ordered greater than?
17892fscc_ogt_no:
17893	clr.b		%d0			# set false
17894	bra.w		fscc_done		# go finish
17895fscc_ogt_yes:
17896	st		%d0			# set true
17897	bra.w		fscc_done		# go finish
17898
17899#
17900# unordered or less or equal:
17901#	_______
17902#	NANvZvN
17903#
17904fscc_ule:
17905	fbule.w		fscc_ule_yes		# unordered or less or equal?
17906fscc_ule_no:
17907	clr.b		%d0			# set false
17908	bra.w		fscc_done		# go finish
17909fscc_ule_yes:
17910	st		%d0			# set true
17911	bra.w		fscc_done		# go finish
17912
17913#
17914# ordered greater than or equal:
17915#	   _____
17916#	Zv(NANvN)
17917#
17918fscc_oge:
17919	fboge.w		fscc_oge_yes		# ordered greater than or equal?
17920fscc_oge_no:
17921	clr.b		%d0			# set false
17922	bra.w		fscc_done		# go finish
17923fscc_oge_yes:
17924	st		%d0			# set true
17925	bra.w		fscc_done		# go finish
17926
17927#
17928# unordered or less than:
17929#	       _
17930#	NANv(N^Z)
17931#
17932fscc_ult:
17933	fbult.w		fscc_ult_yes		# unordered or less than?
17934fscc_ult_no:
17935	clr.b		%d0			# set false
17936	bra.w		fscc_done		# go finish
17937fscc_ult_yes:
17938	st		%d0			# set true
17939	bra.w		fscc_done		# go finish
17940
17941#
17942# ordered less than:
17943#	   _____
17944#	N^(NANvZ)
17945#
17946fscc_olt:
17947	fbolt.w		fscc_olt_yes		# ordered less than?
17948fscc_olt_no:
17949	clr.b		%d0			# set false
17950	bra.w		fscc_done		# go finish
17951fscc_olt_yes:
17952	st		%d0			# set true
17953	bra.w		fscc_done		# go finish
17954
17955#
17956# unordered or greater or equal:
17957#
17958#	NANvZvN
17959#
17960fscc_uge:
17961	fbuge.w		fscc_uge_yes		# unordered or greater than?
17962fscc_uge_no:
17963	clr.b		%d0			# set false
17964	bra.w		fscc_done		# go finish
17965fscc_uge_yes:
17966	st		%d0			# set true
17967	bra.w		fscc_done		# go finish
17968
17969#
17970# ordered less than or equal:
17971#	     ___
17972#	Zv(N^NAN)
17973#
17974fscc_ole:
17975	fbole.w		fscc_ole_yes		# ordered greater or less than?
17976fscc_ole_no:
17977	clr.b		%d0			# set false
17978	bra.w		fscc_done		# go finish
17979fscc_ole_yes:
17980	st		%d0			# set true
17981	bra.w		fscc_done		# go finish
17982
17983#
17984# unordered or greater than:
17985#	     ___
17986#	NANv(NvZ)
17987#
17988fscc_ugt:
17989	fbugt.w		fscc_ugt_yes		# unordered or greater than?
17990fscc_ugt_no:
17991	clr.b		%d0			# set false
17992	bra.w		fscc_done		# go finish
17993fscc_ugt_yes:
17994	st		%d0			# set true
17995	bra.w		fscc_done		# go finish
17996
17997#
17998# ordered greater or less than:
17999#	_____
18000#	NANvZ
18001#
18002fscc_ogl:
18003	fbogl.w		fscc_ogl_yes		# ordered greater or less than?
18004fscc_ogl_no:
18005	clr.b		%d0			# set false
18006	bra.w		fscc_done		# go finish
18007fscc_ogl_yes:
18008	st		%d0			# set true
18009	bra.w		fscc_done		# go finish
18010
18011#
18012# unordered or equal:
18013#
18014#	NANvZ
18015#
18016fscc_ueq:
18017	fbueq.w		fscc_ueq_yes		# unordered or equal?
18018fscc_ueq_no:
18019	clr.b		%d0			# set false
18020	bra.w		fscc_done		# go finish
18021fscc_ueq_yes:
18022	st		%d0			# set true
18023	bra.w		fscc_done		# go finish
18024
18025#
18026# ordered:
18027#	___
18028#	NAN
18029#
18030fscc_or:
18031	fbor.w		fscc_or_yes		# ordered?
18032fscc_or_no:
18033	clr.b		%d0			# set false
18034	bra.w		fscc_done		# go finish
18035fscc_or_yes:
18036	st		%d0			# set true
18037	bra.w		fscc_done		# go finish
18038
18039#
18040# unordered:
18041#
18042#	NAN
18043#
18044fscc_un:
18045	fbun.w		fscc_un_yes		# unordered?
18046fscc_un_no:
18047	clr.b		%d0			# set false
18048	bra.w		fscc_done		# go finish
18049fscc_un_yes:
18050	st		%d0			# set true
18051	bra.w		fscc_done		# go finish
18052
18053#######################################################################
18054
18055#
18056# the bsun exception bit was set. now, check to see is BSUN
18057# is enabled. if so, don't store result and correct stack frame
18058# for a bsun exception.
18059#
18060fscc_chk_bsun:
18061	btst		&bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
18062	bne.w		fscc_bsun
18063
18064#
18065# the bsun exception bit was not set.
18066# the result has been selected.
18067# now, check to see if the result is to be stored in the data register
18068# file or in memory.
18069#
18070fscc_done:
18071	mov.l		%d0,%a0			# save result for a moment
18072
18073	mov.b		1+EXC_OPWORD(%a6),%d1	# fetch lo opword
18074	mov.l		%d1,%d0			# make a copy
18075	andi.b		&0x38,%d1		# extract src mode
18076
18077	bne.b		fscc_mem_op		# it's a memory operation
18078
18079	mov.l		%d0,%d1
18080	andi.w		&0x7,%d1		# pass index in d1
18081	mov.l		%a0,%d0			# pass result in d0
18082	bsr.l		store_dreg_b		# save result in regfile
18083	rts
18084
18085#
18086# the stacked <ea> is correct with the exception of:
18087# 	-> Dn : <ea> is garbage
18088#
18089# if the addressing mode is post-increment or pre-decrement,
18090# then the address registers have not been updated.
18091#
18092fscc_mem_op:
18093	cmpi.b		%d1,&0x18		# is <ea> (An)+ ?
18094	beq.b		fscc_mem_inc		# yes
18095	cmpi.b		%d1,&0x20		# is <ea> -(An) ?
18096	beq.b		fscc_mem_dec		# yes
18097
18098	mov.l		%a0,%d0			# pass result in d0
18099	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18100	bsr.l		_dmem_write_byte	# write result byte
18101
18102	tst.l		%d1			# did dstore fail?
18103	bne.w		fscc_err		# yes
18104
18105	rts
18106
18107# addresing mode is post-increment. write the result byte. if the write
18108# fails then don't update the address register. if write passes then
18109# call inc_areg() to update the address register.
18110fscc_mem_inc:
18111	mov.l		%a0,%d0			# pass result in d0
18112	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18113	bsr.l		_dmem_write_byte	# write result byte
18114
18115	tst.l		%d1			# did dstore fail?
18116	bne.w		fscc_err		# yes
18117
18118	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
18119	andi.w		&0x7,%d1		# pass index in d1
18120	movq.l		&0x1,%d0		# pass amt to inc by
18121	bsr.l		inc_areg		# increment address register
18122
18123	rts
18124
18125# addressing mode is pre-decrement. write the result byte. if the write
18126# fails then don't update the address register. if the write passes then
18127# call dec_areg() to update the address register.
18128fscc_mem_dec:
18129	mov.l		%a0,%d0			# pass result in d0
18130	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18131	bsr.l		_dmem_write_byte	# write result byte
18132
18133	tst.l		%d1			# did dstore fail?
18134	bne.w		fscc_err		# yes
18135
18136	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
18137	andi.w		&0x7,%d1		# pass index in d1
18138	movq.l		&0x1,%d0		# pass amt to dec by
18139	bsr.l		dec_areg		# decrement address register
18140
18141	rts
18142
18143# the emulation routine set bsun and BSUN was enabled. have to
18144# fix stack and jump to the bsun handler.
18145# let the caller of this routine shift the stack frame up to
18146# eliminate the effective address field.
18147fscc_bsun:
18148	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
18149	rts
18150
18151# the byte write to memory has failed. pass the failing effective address
18152# and a FSLW to funimp_dacc().
18153fscc_err:
18154	mov.w		&0x00a1,EXC_VOFF(%a6)
18155	bra.l		facc_finish
18156
18157#########################################################################
18158# XDEF ****************************************************************	#
18159#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
18160#									#
18161# XREF ****************************************************************	#
18162#	fetch_dreg() - fetch data register				#
18163#	{i,d,}mem_read() - fetch data from memory			#
18164#	_mem_write() - write data to memory				#
18165#	iea_iacc() - instruction memory access error occurred		#
18166#	iea_dacc() - data memory access error occurred			#
18167#	restore() - restore An index regs if access error occurred	#
18168#									#
18169# INPUT ***************************************************************	#
18170#	None								#
18171# 									#
18172# OUTPUT **************************************************************	#
18173#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
18174#		d0 = size of dump					#
18175#		d1 = Dn							#
18176#	Else if instruction access error,				#
18177#		d0 = FSLW						#
18178#	Else if data access error,					#
18179#		d0 = FSLW						#
18180#		a0 = address of fault					#
18181#	Else								#
18182#		none.							#
18183#									#
18184# ALGORITHM ***********************************************************	#
18185#	The effective address must be calculated since this is entered	#
18186# from an "Unimplemented Effective Address" exception handler. So, we	#
18187# have our own fcalc_ea() routine here. If an access error is flagged	#
18188# by a _{i,d,}mem_read() call, we must exit through the special		#
18189# handler.								#
18190#	The data register is determined and its value loaded to get the	#
18191# string of FP registers affected. This value is used as an index into	#
18192# a lookup table such that we can determine the number of bytes		#
18193# involved. 								#
18194#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
18195# to read in all FP values. Again, _mem_read() may fail and require a	#
18196# special exit. 							#
18197#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
18198# to write all FP values. _mem_write() may also fail.			#
18199# 	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
18200# then we return the size of the dump and the string to the caller	#
18201# so that the move can occur outside of this routine. This special	#
18202# case is required so that moves to the system stack are handled	#
18203# correctly.								#
18204#									#
18205# DYNAMIC:								#
18206# 	fmovm.x	dn, <ea>						#
18207# 	fmovm.x	<ea>, dn						#
18208#									#
18209#	      <WORD 1>		      <WORD2>				#
18210#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
18211#					  				#
18212#	& = (0): predecrement addressing mode				#
18213#	    (1): postincrement or control addressing mode		#
18214#	@ = (0): move listed regs from memory to the FPU		#
18215#	    (1): move listed regs from the FPU to memory		#
18216#	$$$    : index of data register holding reg select mask		#
18217#									#
18218# NOTES:								#
18219#	If the data register holds a zero, then the			#
18220#	instruction is a nop.						#
18221#									#
18222#########################################################################
18223
18224	global		fmovm_dynamic
18225fmovm_dynamic:
18226
18227# extract the data register in which the bit string resides...
18228	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
18229	andi.w		&0x70,%d1		# extract reg bits
18230	lsr.b		&0x4,%d1		# shift into lo bits
18231
18232# fetch the bit string into d0...
18233	bsr.l		fetch_dreg		# fetch reg string
18234
18235	andi.l		&0x000000ff,%d0		# keep only lo byte
18236
18237	mov.l		%d0,-(%sp)		# save strg
18238	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
18239	mov.l		%d0,-(%sp)		# save size
18240	bsr.l		fmovm_calc_ea		# calculate <ea>
18241	mov.l		(%sp)+,%d0		# restore size
18242	mov.l		(%sp)+,%d1		# restore strg
18243
18244# if the bit string is a zero, then the operation is a no-op
18245# but, make sure that we've calculated ea and advanced the opword pointer
18246	beq.w		fmovm_data_done
18247
18248# separate move ins from move outs...
18249	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
18250	beq.w		fmovm_data_in		# it's a move out
18251
18252#############
18253# MOVE OUT: #
18254#############
18255fmovm_data_out:
18256	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
18257	bne.w		fmovm_out_ctrl		# control
18258
18259############################
18260fmovm_out_predec:
18261# for predecrement mode, the bit string is the opposite of both control
18262# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
18263# here, we convert it to be just like the others...
18264	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
18265
18266	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
18267	beq.b		fmovm_out_ctrl		# user
18268
18269fmovm_out_predec_s:
18270	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
18271	bne.b		fmovm_out_ctrl
18272
18273# the operation was unfortunately an: fmovm.x dn,-(sp)
18274# called from supervisor mode.
18275# we're also passing "size" and "strg" back to the calling routine
18276	rts
18277
18278############################
18279fmovm_out_ctrl:
18280	mov.l		%a0,%a1			# move <ea> to a1
18281
18282	sub.l		%d0,%sp			# subtract size of dump
18283	lea		(%sp),%a0
18284
18285	tst.b		%d1			# should FP0 be moved?
18286	bpl.b		fmovm_out_ctrl_fp1	# no
18287
18288	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
18289	mov.l		0x4+EXC_FP0(%a6),(%a0)+
18290	mov.l		0x8+EXC_FP0(%a6),(%a0)+
18291
18292fmovm_out_ctrl_fp1:
18293	lsl.b		&0x1,%d1		# should FP1 be moved?
18294	bpl.b		fmovm_out_ctrl_fp2	# no
18295
18296	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
18297	mov.l		0x4+EXC_FP1(%a6),(%a0)+
18298	mov.l		0x8+EXC_FP1(%a6),(%a0)+
18299
18300fmovm_out_ctrl_fp2:
18301	lsl.b		&0x1,%d1		# should FP2 be moved?
18302	bpl.b		fmovm_out_ctrl_fp3	# no
18303
18304	fmovm.x		&0x20,(%a0)		# yes
18305	add.l		&0xc,%a0
18306
18307fmovm_out_ctrl_fp3:
18308	lsl.b		&0x1,%d1		# should FP3 be moved?
18309	bpl.b		fmovm_out_ctrl_fp4	# no
18310
18311	fmovm.x		&0x10,(%a0)		# yes
18312	add.l		&0xc,%a0
18313
18314fmovm_out_ctrl_fp4:
18315	lsl.b		&0x1,%d1		# should FP4 be moved?
18316	bpl.b		fmovm_out_ctrl_fp5	# no
18317
18318	fmovm.x		&0x08,(%a0)		# yes
18319	add.l		&0xc,%a0
18320
18321fmovm_out_ctrl_fp5:
18322	lsl.b		&0x1,%d1		# should FP5 be moved?
18323	bpl.b		fmovm_out_ctrl_fp6	# no
18324
18325	fmovm.x		&0x04,(%a0)		# yes
18326	add.l		&0xc,%a0
18327
18328fmovm_out_ctrl_fp6:
18329	lsl.b		&0x1,%d1		# should FP6 be moved?
18330	bpl.b		fmovm_out_ctrl_fp7	# no
18331
18332	fmovm.x		&0x02,(%a0)		# yes
18333	add.l		&0xc,%a0
18334
18335fmovm_out_ctrl_fp7:
18336	lsl.b		&0x1,%d1		# should FP7 be moved?
18337	bpl.b		fmovm_out_ctrl_done	# no
18338
18339	fmovm.x		&0x01,(%a0)		# yes
18340	add.l		&0xc,%a0
18341
18342fmovm_out_ctrl_done:
18343	mov.l		%a1,L_SCR1(%a6)
18344
18345	lea		(%sp),%a0		# pass: supervisor src
18346	mov.l		%d0,-(%sp)		# save size
18347	bsr.l		_dmem_write		# copy data to user mem
18348
18349	mov.l		(%sp)+,%d0
18350	add.l		%d0,%sp			# clear fpreg data from stack
18351
18352	tst.l		%d1			# did dstore err?
18353	bne.w		fmovm_out_err		# yes
18354
18355	rts
18356
18357############
18358# MOVE IN: #
18359############
18360fmovm_data_in:
18361	mov.l		%a0,L_SCR1(%a6)
18362
18363	sub.l		%d0,%sp			# make room for fpregs
18364	lea		(%sp),%a1
18365
18366	mov.l		%d1,-(%sp)		# save bit string for later
18367	mov.l		%d0,-(%sp)		# save # of bytes
18368
18369	bsr.l		_dmem_read		# copy data from user mem
18370
18371	mov.l		(%sp)+,%d0		# retrieve # of bytes
18372
18373	tst.l		%d1			# did dfetch fail?
18374	bne.w		fmovm_in_err		# yes
18375
18376	mov.l		(%sp)+,%d1		# load bit string
18377
18378	lea		(%sp),%a0		# addr of stack
18379
18380	tst.b		%d1			# should FP0 be moved?
18381	bpl.b		fmovm_data_in_fp1	# no
18382
18383	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
18384	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
18385	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
18386
18387fmovm_data_in_fp1:
18388	lsl.b		&0x1,%d1		# should FP1 be moved?
18389	bpl.b		fmovm_data_in_fp2	# no
18390
18391	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
18392	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
18393	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
18394
18395fmovm_data_in_fp2:
18396	lsl.b		&0x1,%d1		# should FP2 be moved?
18397	bpl.b		fmovm_data_in_fp3	# no
18398
18399	fmovm.x		(%a0)+,&0x20		# yes
18400
18401fmovm_data_in_fp3:
18402	lsl.b		&0x1,%d1		# should FP3 be moved?
18403	bpl.b		fmovm_data_in_fp4	# no
18404
18405	fmovm.x		(%a0)+,&0x10		# yes
18406
18407fmovm_data_in_fp4:
18408	lsl.b		&0x1,%d1		# should FP4 be moved?
18409	bpl.b		fmovm_data_in_fp5	# no
18410
18411	fmovm.x		(%a0)+,&0x08		# yes
18412
18413fmovm_data_in_fp5:
18414	lsl.b		&0x1,%d1		# should FP5 be moved?
18415	bpl.b		fmovm_data_in_fp6	# no
18416
18417	fmovm.x		(%a0)+,&0x04		# yes
18418
18419fmovm_data_in_fp6:
18420	lsl.b		&0x1,%d1		# should FP6 be moved?
18421	bpl.b		fmovm_data_in_fp7	# no
18422
18423	fmovm.x		(%a0)+,&0x02		# yes
18424
18425fmovm_data_in_fp7:
18426	lsl.b		&0x1,%d1		# should FP7 be moved?
18427	bpl.b		fmovm_data_in_done	# no
18428
18429	fmovm.x		(%a0)+,&0x01		# yes
18430
18431fmovm_data_in_done:
18432	add.l		%d0,%sp			# remove fpregs from stack
18433	rts
18434
18435#####################################
18436
18437fmovm_data_done:
18438	rts
18439
18440##############################################################################
18441
18442#
18443# table indexed by the operation's bit string that gives the number
18444# of bytes that will be moved.
18445#
18446# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
18447#
18448tbl_fmovm_size:
18449	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
18450	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18451	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18452	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18453	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18454	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18455	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18456	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18457	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18458	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18459	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18460	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18461	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18462	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18463	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18464	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18465	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18466	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18467	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18468	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18469	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18470	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18471	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18472	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18473	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18474	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18475	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18476	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18477	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18478	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18479	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18480	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
18481
18482#
18483# table to convert a pre-decrement bit string into a post-increment
18484# or control bit string.
18485# ex: 	0x00	==>	0x00
18486#	0x01	==>	0x80
18487#	0x02	==>	0x40
18488#		.
18489#		.
18490#	0xfd	==>	0xbf
18491#	0xfe	==>	0x7f
18492#	0xff	==>	0xff
18493#
18494tbl_fmovm_convert:
18495	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
18496	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
18497	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
18498	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
18499	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
18500	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
18501	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
18502	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
18503	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
18504	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
18505	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
18506	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
18507	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
18508	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
18509	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
18510	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
18511	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
18512	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
18513	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
18514	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
18515	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
18516	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
18517	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
18518	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
18519	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
18520	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
18521	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
18522	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
18523	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
18524	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
18525	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
18526	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
18527
18528	global		fmovm_calc_ea
18529###############################################
18530# _fmovm_calc_ea: calculate effective address #
18531###############################################
18532fmovm_calc_ea:
18533	mov.l		%d0,%a0			# move # bytes to a0
18534
18535# currently, MODE and REG are taken from the EXC_OPWORD. this could be
18536# easily changed if they were inputs passed in registers.
18537	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
18538	mov.w		%d0,%d1			# make a copy
18539
18540	andi.w		&0x3f,%d0		# extract mode field
18541	andi.l		&0x7,%d1		# extract reg  field
18542
18543# jump to the corresponding function for each {MODE,REG} pair.
18544	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
18545	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
18546
18547	swbeg		&64
18548tbl_fea_mode:
18549	short		tbl_fea_mode	-	tbl_fea_mode
18550	short		tbl_fea_mode	-	tbl_fea_mode
18551	short		tbl_fea_mode	-	tbl_fea_mode
18552	short		tbl_fea_mode	-	tbl_fea_mode
18553	short		tbl_fea_mode	-	tbl_fea_mode
18554	short		tbl_fea_mode	-	tbl_fea_mode
18555	short		tbl_fea_mode	-	tbl_fea_mode
18556	short		tbl_fea_mode	-	tbl_fea_mode
18557
18558	short		tbl_fea_mode	-	tbl_fea_mode
18559	short		tbl_fea_mode	-	tbl_fea_mode
18560	short		tbl_fea_mode	-	tbl_fea_mode
18561	short		tbl_fea_mode	-	tbl_fea_mode
18562	short		tbl_fea_mode	-	tbl_fea_mode
18563	short		tbl_fea_mode	-	tbl_fea_mode
18564	short		tbl_fea_mode	-	tbl_fea_mode
18565	short		tbl_fea_mode	-	tbl_fea_mode
18566
18567	short		faddr_ind_a0	- 	tbl_fea_mode
18568	short		faddr_ind_a1	- 	tbl_fea_mode
18569	short		faddr_ind_a2	- 	tbl_fea_mode
18570	short		faddr_ind_a3 	- 	tbl_fea_mode
18571	short		faddr_ind_a4 	- 	tbl_fea_mode
18572	short		faddr_ind_a5 	- 	tbl_fea_mode
18573	short		faddr_ind_a6 	- 	tbl_fea_mode
18574	short		faddr_ind_a7 	- 	tbl_fea_mode
18575
18576	short		faddr_ind_p_a0	- 	tbl_fea_mode
18577	short		faddr_ind_p_a1 	- 	tbl_fea_mode
18578	short		faddr_ind_p_a2 	- 	tbl_fea_mode
18579	short		faddr_ind_p_a3 	- 	tbl_fea_mode
18580	short		faddr_ind_p_a4 	- 	tbl_fea_mode
18581	short		faddr_ind_p_a5 	- 	tbl_fea_mode
18582	short		faddr_ind_p_a6 	- 	tbl_fea_mode
18583	short		faddr_ind_p_a7 	- 	tbl_fea_mode
18584
18585	short		faddr_ind_m_a0 	- 	tbl_fea_mode
18586	short		faddr_ind_m_a1 	- 	tbl_fea_mode
18587	short		faddr_ind_m_a2 	- 	tbl_fea_mode
18588	short		faddr_ind_m_a3 	- 	tbl_fea_mode
18589	short		faddr_ind_m_a4 	- 	tbl_fea_mode
18590	short		faddr_ind_m_a5 	- 	tbl_fea_mode
18591	short		faddr_ind_m_a6 	- 	tbl_fea_mode
18592	short		faddr_ind_m_a7 	- 	tbl_fea_mode
18593
18594	short		faddr_ind_disp_a0	- 	tbl_fea_mode
18595	short		faddr_ind_disp_a1 	- 	tbl_fea_mode
18596	short		faddr_ind_disp_a2 	- 	tbl_fea_mode
18597	short		faddr_ind_disp_a3 	- 	tbl_fea_mode
18598	short		faddr_ind_disp_a4 	- 	tbl_fea_mode
18599	short		faddr_ind_disp_a5 	- 	tbl_fea_mode
18600	short		faddr_ind_disp_a6 	- 	tbl_fea_mode
18601	short		faddr_ind_disp_a7	-	tbl_fea_mode
18602
18603	short		faddr_ind_ext 	- 	tbl_fea_mode
18604	short		faddr_ind_ext 	- 	tbl_fea_mode
18605	short		faddr_ind_ext 	- 	tbl_fea_mode
18606	short		faddr_ind_ext 	- 	tbl_fea_mode
18607	short		faddr_ind_ext 	- 	tbl_fea_mode
18608	short		faddr_ind_ext 	- 	tbl_fea_mode
18609	short		faddr_ind_ext 	- 	tbl_fea_mode
18610	short		faddr_ind_ext 	- 	tbl_fea_mode
18611
18612	short		fabs_short	- 	tbl_fea_mode
18613	short		fabs_long	- 	tbl_fea_mode
18614	short		fpc_ind		- 	tbl_fea_mode
18615	short		fpc_ind_ext	- 	tbl_fea_mode
18616	short		tbl_fea_mode	- 	tbl_fea_mode
18617	short		tbl_fea_mode	- 	tbl_fea_mode
18618	short		tbl_fea_mode	- 	tbl_fea_mode
18619	short		tbl_fea_mode	- 	tbl_fea_mode
18620
18621###################################
18622# Address register indirect: (An) #
18623###################################
18624faddr_ind_a0:
18625	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
18626	rts
18627
18628faddr_ind_a1:
18629	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
18630	rts
18631
18632faddr_ind_a2:
18633	mov.l		%a2,%a0			# Get current a2
18634	rts
18635
18636faddr_ind_a3:
18637	mov.l		%a3,%a0			# Get current a3
18638	rts
18639
18640faddr_ind_a4:
18641	mov.l		%a4,%a0			# Get current a4
18642	rts
18643
18644faddr_ind_a5:
18645	mov.l		%a5,%a0			# Get current a5
18646	rts
18647
18648faddr_ind_a6:
18649	mov.l		(%a6),%a0		# Get current a6
18650	rts
18651
18652faddr_ind_a7:
18653	mov.l		EXC_A7(%a6),%a0		# Get current a7
18654	rts
18655
18656#####################################################
18657# Address register indirect w/ postincrement: (An)+ #
18658#####################################################
18659faddr_ind_p_a0:
18660	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
18661	mov.l		%d0,%d1
18662	add.l		%a0,%d1			# Increment
18663	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
18664	mov.l		%d0,%a0
18665	rts
18666
18667faddr_ind_p_a1:
18668	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
18669	mov.l		%d0,%d1
18670	add.l		%a0,%d1			# Increment
18671	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
18672	mov.l		%d0,%a0
18673	rts
18674
18675faddr_ind_p_a2:
18676	mov.l		%a2,%d0			# Get current a2
18677	mov.l		%d0,%d1
18678	add.l		%a0,%d1			# Increment
18679	mov.l		%d1,%a2			# Save incr value
18680	mov.l		%d0,%a0
18681	rts
18682
18683faddr_ind_p_a3:
18684	mov.l		%a3,%d0			# Get current a3
18685	mov.l		%d0,%d1
18686	add.l		%a0,%d1			# Increment
18687	mov.l		%d1,%a3			# Save incr value
18688	mov.l		%d0,%a0
18689	rts
18690
18691faddr_ind_p_a4:
18692	mov.l		%a4,%d0			# Get current a4
18693	mov.l		%d0,%d1
18694	add.l		%a0,%d1			# Increment
18695	mov.l		%d1,%a4			# Save incr value
18696	mov.l		%d0,%a0
18697	rts
18698
18699faddr_ind_p_a5:
18700	mov.l		%a5,%d0			# Get current a5
18701	mov.l		%d0,%d1
18702	add.l		%a0,%d1			# Increment
18703	mov.l		%d1,%a5			# Save incr value
18704	mov.l		%d0,%a0
18705	rts
18706
18707faddr_ind_p_a6:
18708	mov.l		(%a6),%d0		# Get current a6
18709	mov.l		%d0,%d1
18710	add.l		%a0,%d1			# Increment
18711	mov.l		%d1,(%a6)		# Save incr value
18712	mov.l		%d0,%a0
18713	rts
18714
18715faddr_ind_p_a7:
18716	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
18717
18718	mov.l		EXC_A7(%a6),%d0		# Get current a7
18719	mov.l		%d0,%d1
18720	add.l		%a0,%d1			# Increment
18721	mov.l		%d1,EXC_A7(%a6)		# Save incr value
18722	mov.l		%d0,%a0
18723	rts
18724
18725####################################################
18726# Address register indirect w/ predecrement: -(An) #
18727####################################################
18728faddr_ind_m_a0:
18729	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
18730	sub.l		%a0,%d0			# Decrement
18731	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
18732	mov.l		%d0,%a0
18733	rts
18734
18735faddr_ind_m_a1:
18736	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
18737	sub.l		%a0,%d0			# Decrement
18738	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
18739	mov.l		%d0,%a0
18740	rts
18741
18742faddr_ind_m_a2:
18743	mov.l		%a2,%d0			# Get current a2
18744	sub.l		%a0,%d0			# Decrement
18745	mov.l		%d0,%a2			# Save decr value
18746	mov.l		%d0,%a0
18747	rts
18748
18749faddr_ind_m_a3:
18750	mov.l		%a3,%d0			# Get current a3
18751	sub.l		%a0,%d0			# Decrement
18752	mov.l		%d0,%a3			# Save decr value
18753	mov.l		%d0,%a0
18754	rts
18755
18756faddr_ind_m_a4:
18757	mov.l		%a4,%d0			# Get current a4
18758	sub.l		%a0,%d0			# Decrement
18759	mov.l		%d0,%a4			# Save decr value
18760	mov.l		%d0,%a0
18761	rts
18762
18763faddr_ind_m_a5:
18764	mov.l		%a5,%d0			# Get current a5
18765	sub.l		%a0,%d0			# Decrement
18766	mov.l		%d0,%a5			# Save decr value
18767	mov.l		%d0,%a0
18768	rts
18769
18770faddr_ind_m_a6:
18771	mov.l		(%a6),%d0		# Get current a6
18772	sub.l		%a0,%d0			# Decrement
18773	mov.l		%d0,(%a6)		# Save decr value
18774	mov.l		%d0,%a0
18775	rts
18776
18777faddr_ind_m_a7:
18778	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
18779
18780	mov.l		EXC_A7(%a6),%d0		# Get current a7
18781	sub.l		%a0,%d0			# Decrement
18782	mov.l		%d0,EXC_A7(%a6)		# Save decr value
18783	mov.l		%d0,%a0
18784	rts
18785
18786########################################################
18787# Address register indirect w/ displacement: (d16, An) #
18788########################################################
18789faddr_ind_disp_a0:
18790	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18791	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18792	bsr.l		_imem_read_word
18793
18794	tst.l		%d1			# did ifetch fail?
18795	bne.l		iea_iacc		# yes
18796
18797	mov.w		%d0,%a0			# sign extend displacement
18798
18799	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
18800	rts
18801
18802faddr_ind_disp_a1:
18803	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18804	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18805	bsr.l		_imem_read_word
18806
18807	tst.l		%d1			# did ifetch fail?
18808	bne.l		iea_iacc		# yes
18809
18810	mov.w		%d0,%a0			# sign extend displacement
18811
18812	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
18813	rts
18814
18815faddr_ind_disp_a2:
18816	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18817	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18818	bsr.l		_imem_read_word
18819
18820	tst.l		%d1			# did ifetch fail?
18821	bne.l		iea_iacc		# yes
18822
18823	mov.w		%d0,%a0			# sign extend displacement
18824
18825	add.l		%a2,%a0			# a2 + d16
18826	rts
18827
18828faddr_ind_disp_a3:
18829	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18830	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18831	bsr.l		_imem_read_word
18832
18833	tst.l		%d1			# did ifetch fail?
18834	bne.l		iea_iacc		# yes
18835
18836	mov.w		%d0,%a0			# sign extend displacement
18837
18838	add.l		%a3,%a0			# a3 + d16
18839	rts
18840
18841faddr_ind_disp_a4:
18842	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18843	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18844	bsr.l		_imem_read_word
18845
18846	tst.l		%d1			# did ifetch fail?
18847	bne.l		iea_iacc		# yes
18848
18849	mov.w		%d0,%a0			# sign extend displacement
18850
18851	add.l		%a4,%a0			# a4 + d16
18852	rts
18853
18854faddr_ind_disp_a5:
18855	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18856	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18857	bsr.l		_imem_read_word
18858
18859	tst.l		%d1			# did ifetch fail?
18860	bne.l		iea_iacc		# yes
18861
18862	mov.w		%d0,%a0			# sign extend displacement
18863
18864	add.l		%a5,%a0			# a5 + d16
18865	rts
18866
18867faddr_ind_disp_a6:
18868	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18869	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18870	bsr.l		_imem_read_word
18871
18872	tst.l		%d1			# did ifetch fail?
18873	bne.l		iea_iacc		# yes
18874
18875	mov.w		%d0,%a0			# sign extend displacement
18876
18877	add.l		(%a6),%a0		# a6 + d16
18878	rts
18879
18880faddr_ind_disp_a7:
18881	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18882	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18883	bsr.l		_imem_read_word
18884
18885	tst.l		%d1			# did ifetch fail?
18886	bne.l		iea_iacc		# yes
18887
18888	mov.w		%d0,%a0			# sign extend displacement
18889
18890	add.l		EXC_A7(%a6),%a0		# a7 + d16
18891	rts
18892
18893########################################################################
18894# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
18895#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
18896# Memory indirect postindexed: ([bd, An], Xn, od)		       #
18897# Memory indirect preindexed: ([bd, An, Xn], od)		       #
18898########################################################################
18899faddr_ind_ext:
18900	addq.l		&0x8,%d1
18901	bsr.l		fetch_dreg		# fetch base areg
18902	mov.l		%d0,-(%sp)
18903
18904	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18905	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18906	bsr.l		_imem_read_word		# fetch extword in d0
18907
18908	tst.l		%d1			# did ifetch fail?
18909	bne.l		iea_iacc		# yes
18910
18911	mov.l		(%sp)+,%a0
18912
18913	btst		&0x8,%d0
18914	bne.w		fcalc_mem_ind
18915
18916	mov.l		%d0,L_SCR1(%a6)		# hold opword
18917
18918	mov.l		%d0,%d1
18919	rol.w		&0x4,%d1
18920	andi.w		&0xf,%d1		# extract index regno
18921
18922# count on fetch_dreg() not to alter a0...
18923	bsr.l		fetch_dreg		# fetch index
18924
18925	mov.l		%d2,-(%sp)		# save d2
18926	mov.l		L_SCR1(%a6),%d2		# fetch opword
18927
18928	btst		&0xb,%d2		# is it word or long?
18929	bne.b		faii8_long
18930	ext.l		%d0			# sign extend word index
18931faii8_long:
18932	mov.l		%d2,%d1
18933	rol.w		&0x7,%d1
18934	andi.l		&0x3,%d1		# extract scale value
18935
18936	lsl.l		%d1,%d0			# shift index by scale
18937
18938	extb.l		%d2			# sign extend displacement
18939	add.l		%d2,%d0			# index + disp
18940	add.l		%d0,%a0			# An + (index + disp)
18941
18942	mov.l		(%sp)+,%d2		# restore old d2
18943	rts
18944
18945###########################
18946# Absolute short: (XXX).W #
18947###########################
18948fabs_short:
18949	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18950	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18951	bsr.l		_imem_read_word		# fetch short address
18952
18953	tst.l		%d1			# did ifetch fail?
18954	bne.l		iea_iacc		# yes
18955
18956	mov.w		%d0,%a0			# return <ea> in a0
18957	rts
18958
18959##########################
18960# Absolute long: (XXX).L #
18961##########################
18962fabs_long:
18963	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18964	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
18965	bsr.l		_imem_read_long		# fetch long address
18966
18967	tst.l		%d1			# did ifetch fail?
18968	bne.l		iea_iacc		# yes
18969
18970	mov.l		%d0,%a0			# return <ea> in a0
18971	rts
18972
18973#######################################################
18974# Program counter indirect w/ displacement: (d16, PC) #
18975#######################################################
18976fpc_ind:
18977	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18978	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18979	bsr.l		_imem_read_word		# fetch word displacement
18980
18981	tst.l		%d1			# did ifetch fail?
18982	bne.l		iea_iacc		# yes
18983
18984	mov.w		%d0,%a0			# sign extend displacement
18985
18986	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
18987
18988# _imem_read_word() increased the extwptr by 2. need to adjust here.
18989	subq.l		&0x2,%a0		# adjust <ea>
18990	rts
18991
18992##########################################################
18993# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
18994# "     "     w/   "  (base displacement): (bd, PC, An)  #
18995# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
18996# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
18997##########################################################
18998fpc_ind_ext:
18999	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19000	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
19001	bsr.l		_imem_read_word		# fetch ext word
19002
19003	tst.l		%d1			# did ifetch fail?
19004	bne.l		iea_iacc		# yes
19005
19006	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
19007	subq.l		&0x2,%a0		# adjust base
19008
19009	btst		&0x8,%d0		# is disp only 8 bits?
19010	bne.w		fcalc_mem_ind		# calc memory indirect
19011
19012	mov.l		%d0,L_SCR1(%a6)		# store opword
19013
19014	mov.l		%d0,%d1			# make extword copy
19015	rol.w		&0x4,%d1		# rotate reg num into place
19016	andi.w		&0xf,%d1		# extract register number
19017
19018# count on fetch_dreg() not to alter a0...
19019	bsr.l		fetch_dreg		# fetch index
19020
19021	mov.l		%d2,-(%sp)		# save d2
19022	mov.l		L_SCR1(%a6),%d2		# fetch opword
19023
19024	btst		&0xb,%d2		# is index word or long?
19025	bne.b		fpii8_long		# long
19026	ext.l		%d0			# sign extend word index
19027fpii8_long:
19028	mov.l		%d2,%d1
19029	rol.w		&0x7,%d1		# rotate scale value into place
19030	andi.l		&0x3,%d1		# extract scale value
19031
19032	lsl.l		%d1,%d0			# shift index by scale
19033
19034	extb.l		%d2			# sign extend displacement
19035	add.l		%d2,%d0			# disp + index
19036	add.l		%d0,%a0			# An + (index + disp)
19037
19038	mov.l		(%sp)+,%d2		# restore temp register
19039	rts
19040
19041# d2 = index
19042# d3 = base
19043# d4 = od
19044# d5 = extword
19045fcalc_mem_ind:
19046	btst		&0x6,%d0		# is the index suppressed?
19047	beq.b		fcalc_index
19048
19049	movm.l		&0x3c00,-(%sp)		# save d2-d5
19050
19051	mov.l		%d0,%d5			# put extword in d5
19052	mov.l		%a0,%d3			# put base in d3
19053
19054	clr.l		%d2			# yes, so index = 0
19055	bra.b		fbase_supp_ck
19056
19057# index:
19058fcalc_index:
19059	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
19060	bfextu		%d0{&16:&4},%d1		# fetch dreg index
19061	bsr.l		fetch_dreg
19062
19063	movm.l		&0x3c00,-(%sp)		# save d2-d5
19064	mov.l		%d0,%d2			# put index in d2
19065	mov.l		L_SCR1(%a6),%d5
19066	mov.l		%a0,%d3
19067
19068	btst		&0xb,%d5		# is index word or long?
19069	bne.b		fno_ext
19070	ext.l		%d2
19071
19072fno_ext:
19073	bfextu		%d5{&21:&2},%d0
19074	lsl.l		%d0,%d2
19075
19076# base address (passed as parameter in d3):
19077# we clear the value here if it should actually be suppressed.
19078fbase_supp_ck:
19079	btst		&0x7,%d5		# is the bd suppressed?
19080	beq.b		fno_base_sup
19081	clr.l		%d3
19082
19083# base displacement:
19084fno_base_sup:
19085	bfextu		%d5{&26:&2},%d0		# get bd size
19086#	beq.l		fmovm_error		# if (size == 0) it's reserved
19087
19088	cmpi.b	 	%d0,&0x2
19089	blt.b		fno_bd
19090	beq.b		fget_word_bd
19091
19092	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19093	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19094	bsr.l		_imem_read_long
19095
19096	tst.l		%d1			# did ifetch fail?
19097	bne.l		fcea_iacc		# yes
19098
19099	bra.b		fchk_ind
19100
19101fget_word_bd:
19102	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19103	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
19104	bsr.l		_imem_read_word
19105
19106	tst.l		%d1			# did ifetch fail?
19107	bne.l		fcea_iacc		# yes
19108
19109	ext.l		%d0			# sign extend bd
19110
19111fchk_ind:
19112	add.l		%d0,%d3			# base += bd
19113
19114# outer displacement:
19115fno_bd:
19116	bfextu		%d5{&30:&2},%d0		# is od suppressed?
19117	beq.w		faii_bd
19118
19119	cmpi.b	 	%d0,&0x2
19120	blt.b		fnull_od
19121	beq.b		fword_od
19122
19123	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19124	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19125	bsr.l		_imem_read_long
19126
19127	tst.l		%d1			# did ifetch fail?
19128	bne.l		fcea_iacc		# yes
19129
19130	bra.b 		fadd_them
19131
19132fword_od:
19133	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19134	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
19135	bsr.l		_imem_read_word
19136
19137	tst.l		%d1			# did ifetch fail?
19138	bne.l		fcea_iacc		# yes
19139
19140	ext.l		%d0			# sign extend od
19141	bra.b		fadd_them
19142
19143fnull_od:
19144	clr.l		%d0
19145
19146fadd_them:
19147	mov.l		%d0,%d4
19148
19149	btst		&0x2,%d5		# pre or post indexing?
19150	beq.b		fpre_indexed
19151
19152	mov.l		%d3,%a0
19153	bsr.l		_dmem_read_long
19154
19155	tst.l		%d1			# did dfetch fail?
19156	bne.w		fcea_err		# yes
19157
19158	add.l		%d2,%d0			# <ea> += index
19159	add.l		%d4,%d0			# <ea> += od
19160	bra.b		fdone_ea
19161
19162fpre_indexed:
19163	add.l		%d2,%d3			# preindexing
19164	mov.l		%d3,%a0
19165	bsr.l		_dmem_read_long
19166
19167	tst.l		%d1			# did dfetch fail?
19168	bne.w		fcea_err		# yes
19169
19170	add.l		%d4,%d0			# ea += od
19171	bra.b		fdone_ea
19172
19173faii_bd:
19174	add.l		%d2,%d3			# ea = (base + bd) + index
19175	mov.l		%d3,%d0
19176fdone_ea:
19177	mov.l		%d0,%a0
19178
19179	movm.l		(%sp)+,&0x003c		# restore d2-d5
19180	rts
19181
19182#########################################################
19183fcea_err:
19184	mov.l		%d3,%a0
19185
19186	movm.l		(%sp)+,&0x003c		# restore d2-d5
19187	mov.w		&0x0101,%d0
19188	bra.l		iea_dacc
19189
19190fcea_iacc:
19191	movm.l		(%sp)+,&0x003c		# restore d2-d5
19192	bra.l		iea_iacc
19193
19194fmovm_out_err:
19195	bsr.l		restore
19196	mov.w		&0x00e1,%d0
19197	bra.b		fmovm_err
19198
19199fmovm_in_err:
19200	bsr.l		restore
19201	mov.w		&0x0161,%d0
19202
19203fmovm_err:
19204	mov.l		L_SCR1(%a6),%a0
19205	bra.l		iea_dacc
19206
19207#########################################################################
19208# XDEF ****************************************************************	#
19209# 	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
19210#									#
19211# XREF ****************************************************************	#
19212#	_imem_read_long() - read longword from memory			#
19213#	iea_iacc() - _imem_read_long() failed; error recovery		#
19214#									#
19215# INPUT ***************************************************************	#
19216#	None								#
19217# 									#
19218# OUTPUT **************************************************************	#
19219#	If _imem_read_long() doesn't fail:				#
19220#		USER_FPCR(a6)  = new FPCR value				#
19221#		USER_FPSR(a6)  = new FPSR value				#
19222#		USER_FPIAR(a6) = new FPIAR value			#
19223#									#
19224# ALGORITHM ***********************************************************	#
19225# 	Decode the instruction type by looking at the extension word 	#
19226# in order to see how many control registers to fetch from memory.	#
19227# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
19228# the special access error exit handler iea_iacc().			#
19229#									#
19230# Instruction word decoding:						#
19231#									#
19232# 	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
19233#									#
19234#		WORD1			WORD2				#
19235#	1111 0010 00 111100	100$ $$00 0000 0000			#
19236#									#
19237#	$$$ (100): FPCR							#
19238#	    (010): FPSR							#
19239#	    (001): FPIAR						#
19240#	    (000): FPIAR						#
19241#									#
19242#########################################################################
19243
19244	global		fmovm_ctrl
19245fmovm_ctrl:
19246	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
19247	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
19248	beq.w		fctrl_in_7		# yes
19249	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
19250	beq.w		fctrl_in_6		# yes
19251	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
19252	beq.b		fctrl_in_5		# yes
19253
19254# fmovem.l #<data>, fpsr/fpiar
19255fctrl_in_3:
19256	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19257	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19258	bsr.l		_imem_read_long		# fetch FPSR from mem
19259
19260	tst.l		%d1			# did ifetch fail?
19261	bne.l		iea_iacc		# yes
19262
19263	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
19264	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19265	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19266	bsr.l		_imem_read_long		# fetch FPIAR from mem
19267
19268	tst.l		%d1			# did ifetch fail?
19269	bne.l		iea_iacc		# yes
19270
19271	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
19272	rts
19273
19274# fmovem.l #<data>, fpcr/fpiar
19275fctrl_in_5:
19276	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19277	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19278	bsr.l		_imem_read_long		# fetch FPCR from mem
19279
19280	tst.l		%d1			# did ifetch fail?
19281	bne.l		iea_iacc		# yes
19282
19283	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
19284	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19285	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19286	bsr.l		_imem_read_long		# fetch FPIAR from mem
19287
19288	tst.l		%d1			# did ifetch fail?
19289	bne.l		iea_iacc		# yes
19290
19291	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
19292	rts
19293
19294# fmovem.l #<data>, fpcr/fpsr
19295fctrl_in_6:
19296	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19297	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19298	bsr.l		_imem_read_long		# fetch FPCR from mem
19299
19300	tst.l		%d1			# did ifetch fail?
19301	bne.l		iea_iacc		# yes
19302
19303	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
19304	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19305	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19306	bsr.l		_imem_read_long		# fetch FPSR from mem
19307
19308	tst.l		%d1			# did ifetch fail?
19309	bne.l		iea_iacc		# yes
19310
19311	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
19312	rts
19313
19314# fmovem.l #<data>, fpcr/fpsr/fpiar
19315fctrl_in_7:
19316	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19317	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19318	bsr.l		_imem_read_long		# fetch FPCR from mem
19319
19320	tst.l		%d1			# did ifetch fail?
19321	bne.l		iea_iacc		# yes
19322
19323	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
19324	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19325	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19326	bsr.l		_imem_read_long		# fetch FPSR from mem
19327
19328	tst.l		%d1			# did ifetch fail?
19329	bne.l		iea_iacc		# yes
19330
19331	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
19332	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19333	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19334	bsr.l		_imem_read_long		# fetch FPIAR from mem
19335
19336	tst.l		%d1			# did ifetch fail?
19337	bne.l		iea_iacc		# yes
19338
19339	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
19340	rts
19341
19342#########################################################################
19343# XDEF ****************************************************************	#
19344#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
19345#									#
19346# XREF ****************************************************************	#
19347#	inc_areg() - increment an address register			#
19348#	dec_areg() - decrement an address register			#
19349#									#
19350# INPUT ***************************************************************	#
19351#	d0 = number of bytes to adjust <ea> by				#
19352# 									#
19353# OUTPUT **************************************************************	#
19354#	None								#
19355#									#
19356# ALGORITHM ***********************************************************	#
19357# "Dummy" CALCulate Effective Address:					#
19358# 	The stacked <ea> for FP unimplemented instructions and opclass	#
19359#	two packed instructions is correct with the exception of...	#
19360#									#
19361#	1) -(An)   : The register is not updated regardless of size.	#
19362#		     Also, for extended precision and packed, the 	#
19363#		     stacked <ea> value is 8 bytes too big		#
19364#	2) (An)+   : The register is not updated.			#
19365#	3) #<data> : The upper longword of the immediate operand is 	#
19366#		     stacked b,w,l and s sizes are completely stacked. 	#
19367#		     d,x, and p are not.				#
19368#									#
19369#########################################################################
19370
19371	global		_dcalc_ea
19372_dcalc_ea:
19373	mov.l		%d0, %a0		# move # bytes to %a0
19374
19375	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
19376	mov.l		%d0, %d1		# make a copy
19377
19378	andi.w		&0x38, %d0		# extract mode field
19379	andi.l		&0x7, %d1		# extract reg  field
19380
19381	cmpi.b		%d0,&0x18		# is mode (An)+ ?
19382	beq.b		dcea_pi			# yes
19383
19384	cmpi.b		%d0,&0x20		# is mode -(An) ?
19385	beq.b		dcea_pd			# yes
19386
19387	or.w		%d1,%d0			# concat mode,reg
19388	cmpi.b		%d0,&0x3c		# is mode #<data>?
19389
19390	beq.b		dcea_imm		# yes
19391
19392	mov.l		EXC_EA(%a6),%a0		# return <ea>
19393	rts
19394
19395# need to set immediate data flag here since we'll need to do
19396# an imem_read to fetch this later.
19397dcea_imm:
19398	mov.b		&immed_flg,SPCOND_FLG(%a6)
19399	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
19400	rts
19401
19402# here, the <ea> is stacked correctly. however, we must update the
19403# address register...
19404dcea_pi:
19405	mov.l		%a0,%d0			# pass amt to inc by
19406	bsr.l		inc_areg		# inc addr register
19407
19408	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19409	rts
19410
19411# the <ea> is stacked correctly for all but extended and packed which
19412# the <ea>s are 8 bytes too large.
19413# it would make no sense to have a pre-decrement to a7 in supervisor
19414# mode so we don't even worry about this tricky case here : )
19415dcea_pd:
19416	mov.l		%a0,%d0			# pass amt to dec by
19417	bsr.l		dec_areg		# dec addr register
19418
19419	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19420
19421	cmpi.b		%d0,&0xc		# is opsize ext or packed?
19422	beq.b		dcea_pd2		# yes
19423	rts
19424dcea_pd2:
19425	sub.l		&0x8,%a0		# correct <ea>
19426	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
19427	rts
19428
19429#########################################################################
19430# XDEF ****************************************************************	#
19431# 	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
19432#			 and packed data opclass 3 operations.		#
19433#									#
19434# XREF ****************************************************************	#
19435#	None								#
19436#									#
19437# INPUT ***************************************************************	#
19438#	None								#
19439# 									#
19440# OUTPUT **************************************************************	#
19441#	a0 = return correct effective address				#
19442#									#
19443# ALGORITHM ***********************************************************	#
19444#	For opclass 3 extended and packed data operations, the <ea>	#
19445# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
19446# modes. Also, while we're at it, the index register itself must get 	#
19447# updated.								#
19448# 	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
19449# and return that value as the correct <ea> and store that value in An.	#
19450# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
19451#									#
19452#########################################################################
19453
19454# This calc_ea is currently used to retrieve the correct <ea>
19455# for fmove outs of type extended and packed.
19456	global		_calc_ea_fout
19457_calc_ea_fout:
19458	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
19459	mov.l		%d0,%d1			# make a copy
19460
19461	andi.w		&0x38,%d0		# extract mode field
19462	andi.l		&0x7,%d1		# extract reg  field
19463
19464	cmpi.b		%d0,&0x18		# is mode (An)+ ?
19465	beq.b		ceaf_pi			# yes
19466
19467	cmpi.b		%d0,&0x20		# is mode -(An) ?
19468	beq.w		ceaf_pd			# yes
19469
19470	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19471	rts
19472
19473# (An)+ : extended and packed fmove out
19474#	: stacked <ea> is correct
19475#	: "An" not updated
19476ceaf_pi:
19477	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
19478	mov.l		EXC_EA(%a6),%a0
19479	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
19480
19481	swbeg		&0x8
19482tbl_ceaf_pi:
19483	short		ceaf_pi0 - tbl_ceaf_pi
19484	short		ceaf_pi1 - tbl_ceaf_pi
19485	short		ceaf_pi2 - tbl_ceaf_pi
19486	short		ceaf_pi3 - tbl_ceaf_pi
19487	short		ceaf_pi4 - tbl_ceaf_pi
19488	short		ceaf_pi5 - tbl_ceaf_pi
19489	short		ceaf_pi6 - tbl_ceaf_pi
19490	short		ceaf_pi7 - tbl_ceaf_pi
19491
19492ceaf_pi0:
19493	addi.l		&0xc,EXC_DREGS+0x8(%a6)
19494	rts
19495ceaf_pi1:
19496	addi.l		&0xc,EXC_DREGS+0xc(%a6)
19497	rts
19498ceaf_pi2:
19499	add.l		&0xc,%a2
19500	rts
19501ceaf_pi3:
19502	add.l		&0xc,%a3
19503	rts
19504ceaf_pi4:
19505	add.l		&0xc,%a4
19506	rts
19507ceaf_pi5:
19508	add.l		&0xc,%a5
19509	rts
19510ceaf_pi6:
19511	addi.l		&0xc,EXC_A6(%a6)
19512	rts
19513ceaf_pi7:
19514	mov.b		&mia7_flg,SPCOND_FLG(%a6)
19515	addi.l		&0xc,EXC_A7(%a6)
19516	rts
19517
19518# -(An) : extended and packed fmove out
19519#	: stacked <ea> = actual <ea> + 8
19520#	: "An" not updated
19521ceaf_pd:
19522	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
19523	mov.l		EXC_EA(%a6),%a0
19524	sub.l		&0x8,%a0
19525	sub.l		&0x8,EXC_EA(%a6)
19526	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
19527
19528	swbeg		&0x8
19529tbl_ceaf_pd:
19530	short		ceaf_pd0 - tbl_ceaf_pd
19531	short		ceaf_pd1 - tbl_ceaf_pd
19532	short		ceaf_pd2 - tbl_ceaf_pd
19533	short		ceaf_pd3 - tbl_ceaf_pd
19534	short		ceaf_pd4 - tbl_ceaf_pd
19535	short		ceaf_pd5 - tbl_ceaf_pd
19536	short		ceaf_pd6 - tbl_ceaf_pd
19537	short		ceaf_pd7 - tbl_ceaf_pd
19538
19539ceaf_pd0:
19540	mov.l		%a0,EXC_DREGS+0x8(%a6)
19541	rts
19542ceaf_pd1:
19543	mov.l		%a0,EXC_DREGS+0xc(%a6)
19544	rts
19545ceaf_pd2:
19546	mov.l		%a0,%a2
19547	rts
19548ceaf_pd3:
19549	mov.l		%a0,%a3
19550	rts
19551ceaf_pd4:
19552	mov.l		%a0,%a4
19553	rts
19554ceaf_pd5:
19555	mov.l		%a0,%a5
19556	rts
19557ceaf_pd6:
19558	mov.l		%a0,EXC_A6(%a6)
19559	rts
19560ceaf_pd7:
19561	mov.l		%a0,EXC_A7(%a6)
19562	mov.b		&mda7_flg,SPCOND_FLG(%a6)
19563	rts
19564
19565#########################################################################
19566# XDEF ****************************************************************	#
19567#	_load_fop(): load operand for unimplemented FP exception	#
19568#									#
19569# XREF ****************************************************************	#
19570#	set_tag_x() - determine ext prec optype tag			#
19571#	set_tag_s() - determine sgl prec optype tag			#
19572#	set_tag_d() - determine dbl prec optype tag			#
19573#	unnorm_fix() - convert normalized number to denorm or zero	#
19574#	norm() - normalize a denormalized number			#
19575#	get_packed() - fetch a packed operand from memory		#
19576#	_dcalc_ea() - calculate <ea>, fixing An in process		#
19577#									#
19578#	_imem_read_{word,long}() - read from instruction memory		#
19579#	_dmem_read() - read from data memory				#
19580#	_dmem_read_{byte,word,long}() - read from data memory		#
19581#									#
19582#	facc_in_{b,w,l,d,x}() - mem read failed; special exit point	#
19583#									#
19584# INPUT ***************************************************************	#
19585#	None								#
19586# 									#
19587# OUTPUT **************************************************************	#
19588#	If memory access doesn't fail:					#
19589#		FP_SRC(a6) = source operand in extended precision	#
19590# 		FP_DST(a6) = destination operand in extended precision	#
19591#									#
19592# ALGORITHM ***********************************************************	#
19593# 	This is called from the Unimplemented FP exception handler in	#
19594# order to load the source and maybe destination operand into		#
19595# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load	#
19596# the source and destination from the FP register file. Set the optype	#
19597# tags for both if dyadic, one for monadic. If a number is an UNNORM,	#
19598# convert it to a DENORM or a ZERO.					#
19599# 	If the instruction is opclass two (memory->reg), then fetch	#
19600# the destination from the register file and the source operand from 	#
19601# memory. Tag and fix both as above w/ opclass zero instructions.	#
19602# 	If the source operand is byte,word,long, or single, it may be	#
19603# in the data register file. If it's actually out in memory, use one of	#
19604# the mem_read() routines to fetch it. If the mem_read() access returns	#
19605# a failing value, exit through the special facc_in() routine which	#
19606# will create an acess error exception frame from the current exception #
19607# frame.								#
19608# 	Immediate data and regular data accesses are separated because 	#
19609# if an immediate data access fails, the resulting fault status		#
19610# longword stacked for the access error exception must have the 	#
19611# instruction bit set.							#
19612#									#
19613#########################################################################
19614
19615	global		_load_fop
19616_load_fop:
19617
19618#  15     13 12 10  9 7  6       0
19619# /        \ /   \ /  \ /         \
19620# ---------------------------------
19621# | opclass | RX  | RY | EXTENSION |  (2nd word of general FP instruction)
19622# ---------------------------------
19623#
19624
19625#	bfextu		EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
19626#	cmpi.b		%d0, &0x2		# which class is it? ('000,'010,'011)
19627#	beq.w		op010			# handle <ea> -> fpn
19628#	bgt.w		op011			# handle fpn -> <ea>
19629
19630# we're not using op011 for now...
19631	btst		&0x6,EXC_CMDREG(%a6)
19632	bne.b		op010
19633
19634############################
19635# OPCLASS '000: reg -> reg #
19636############################
19637op000:
19638	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension word lo
19639	btst		&0x5,%d0		# testing extension bits
19640	beq.b		op000_src		# (bit 5 == 0) => monadic
19641	btst		&0x4,%d0		# (bit 5 == 1)
19642	beq.b		op000_dst		# (bit 4 == 0) => dyadic
19643	and.w		&0x007f,%d0		# extract extension bits {6:0}
19644	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
19645	bne.b		op000_src		# it's an fcmp
19646
19647op000_dst:
19648	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19649	bsr.l		load_fpn2		# fetch dst fpreg into FP_DST
19650
19651	bsr.l		set_tag_x		# get dst optype tag
19652
19653	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
19654	beq.b		op000_dst_unnorm	# yes
19655op000_dst_cont:
19656	mov.b 		%d0, DTAG(%a6)		# store the dst optype tag
19657
19658op000_src:
19659	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
19660	bsr.l		load_fpn1		# fetch src fpreg into FP_SRC
19661
19662	bsr.l		set_tag_x		# get src optype tag
19663
19664	cmpi.b		%d0, &UNNORM		# is src fpreg an UNNORM?
19665	beq.b		op000_src_unnorm	# yes
19666op000_src_cont:
19667	mov.b		%d0, STAG(%a6)		# store the src optype tag
19668	rts
19669
19670op000_dst_unnorm:
19671	bsr.l		unnorm_fix		# fix the dst UNNORM
19672	bra.b		op000_dst_cont
19673op000_src_unnorm:
19674	bsr.l		unnorm_fix		# fix the src UNNORM
19675	bra.b		op000_src_cont
19676
19677#############################
19678# OPCLASS '010: <ea> -> reg #
19679#############################
19680op010:
19681	mov.w		EXC_CMDREG(%a6),%d0	# fetch extension word
19682	btst		&0x5,%d0		# testing extension bits
19683	beq.b		op010_src		# (bit 5 == 0) => monadic
19684	btst		&0x4,%d0		# (bit 5 == 1)
19685	beq.b		op010_dst		# (bit 4 == 0) => dyadic
19686	and.w		&0x007f,%d0		# extract extension bits {6:0}
19687	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
19688	bne.b		op010_src		# it's an fcmp
19689
19690op010_dst:
19691	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19692	bsr.l		load_fpn2		# fetch dst fpreg ptr
19693
19694	bsr.l		set_tag_x		# get dst type tag
19695
19696	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
19697	beq.b		op010_dst_unnorm	# yes
19698op010_dst_cont:
19699	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
19700
19701op010_src:
19702	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
19703
19704	bfextu		EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
19705	bne.w		fetch_from_mem		# src op is in memory
19706
19707op010_dreg:
19708	clr.b		STAG(%a6)		# either NORM or ZERO
19709	bfextu		EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
19710
19711	mov.w		(tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
19712	jmp		(tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
19713
19714op010_dst_unnorm:
19715	bsr.l		unnorm_fix		# fix the dst UNNORM
19716	bra.b		op010_dst_cont
19717
19718	swbeg		&0x8
19719tbl_op010_dreg:
19720	short		opd_long	- tbl_op010_dreg
19721	short		opd_sgl 	- tbl_op010_dreg
19722	short		tbl_op010_dreg	- tbl_op010_dreg
19723	short		tbl_op010_dreg	- tbl_op010_dreg
19724	short		opd_word	- tbl_op010_dreg
19725	short		tbl_op010_dreg	- tbl_op010_dreg
19726	short		opd_byte	- tbl_op010_dreg
19727	short		tbl_op010_dreg	- tbl_op010_dreg
19728
19729#
19730# LONG: can be either NORM or ZERO...
19731#
19732opd_long:
19733	bsr.l		fetch_dreg		# fetch long in d0
19734	fmov.l		%d0, %fp0 		# load a long
19735	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19736	fbeq.w		opd_long_zero		# long is a ZERO
19737	rts
19738opd_long_zero:
19739	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19740	rts
19741
19742#
19743# WORD: can be either NORM or ZERO...
19744#
19745opd_word:
19746	bsr.l		fetch_dreg		# fetch word in d0
19747	fmov.w		%d0, %fp0 		# load a word
19748	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19749	fbeq.w		opd_word_zero		# WORD is a ZERO
19750	rts
19751opd_word_zero:
19752	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19753	rts
19754
19755#
19756# BYTE: can be either NORM or ZERO...
19757#
19758opd_byte:
19759	bsr.l		fetch_dreg		# fetch word in d0
19760	fmov.b		%d0, %fp0 		# load a byte
19761	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19762	fbeq.w		opd_byte_zero		# byte is a ZERO
19763	rts
19764opd_byte_zero:
19765	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19766	rts
19767
19768#
19769# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
19770#
19771# separate SNANs and DENORMs so they can be loaded w/ special care.
19772# all others can simply be moved "in" using fmove.
19773#
19774opd_sgl:
19775	bsr.l		fetch_dreg		# fetch sgl in d0
19776	mov.l		%d0,L_SCR1(%a6)
19777
19778	lea		L_SCR1(%a6), %a0 	# pass: ptr to the sgl
19779	bsr.l		set_tag_s		# determine sgl type
19780	mov.b		%d0, STAG(%a6)		# save the src tag
19781
19782	cmpi.b		%d0, &SNAN		# is it an SNAN?
19783	beq.w		get_sgl_snan		# yes
19784
19785	cmpi.b		%d0, &DENORM		# is it a DENORM?
19786	beq.w		get_sgl_denorm		# yes
19787
19788	fmov.s		(%a0), %fp0		# no, so can load it regular
19789	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19790	rts
19791
19792##############################################################################
19793
19794#########################################################################
19795# fetch_from_mem():							#
19796# - src is out in memory. must:						#
19797#	(1) calc ea - must read AFTER you know the src type since	#
19798#		      if the ea is -() or ()+, need to know # of bytes.	#
19799#	(2) read it in from either user or supervisor space		#
19800#	(3) if (b || w || l) then simply read in			#
19801#	    if (s || d || x) then check for SNAN,UNNORM,DENORM		#
19802#	    if (packed) then punt for now				#
19803# INPUT:								#
19804#	%d0 : src type field						#
19805#########################################################################
19806fetch_from_mem:
19807	clr.b		STAG(%a6)		# either NORM or ZERO
19808
19809	mov.w		(tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
19810	jmp		(tbl_fp_type.b,%pc,%d0.w*1)
19811
19812	swbeg		&0x8
19813tbl_fp_type:
19814	short		load_long	- tbl_fp_type
19815	short		load_sgl	- tbl_fp_type
19816	short		load_ext	- tbl_fp_type
19817	short		load_packed	- tbl_fp_type
19818	short		load_word	- tbl_fp_type
19819	short		load_dbl	- tbl_fp_type
19820	short		load_byte	- tbl_fp_type
19821	short		tbl_fp_type	- tbl_fp_type
19822
19823#########################################
19824# load a LONG into %fp0:		#
19825# 	-number can't fault		#
19826#	(1) calc ea			#
19827#	(2) read 4 bytes into L_SCR1	#
19828#	(3) fmov.l into %fp0		#
19829#########################################
19830load_long:
19831	movq.l		&0x4, %d0		# pass: 4 (bytes)
19832	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19833
19834	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19835	beq.b		load_long_immed
19836
19837	bsr.l		_dmem_read_long		# fetch src operand from memory
19838
19839	tst.l		%d1			# did dfetch fail?
19840	bne.l		facc_in_l		# yes
19841
19842load_long_cont:
19843	fmov.l		%d0, %fp0		# read into %fp0;convert to xprec
19844	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19845
19846	fbeq.w		load_long_zero		# src op is a ZERO
19847	rts
19848load_long_zero:
19849	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19850	rts
19851
19852load_long_immed:
19853	bsr.l		_imem_read_long		# fetch src operand immed data
19854
19855	tst.l		%d1			# did ifetch fail?
19856	bne.l		funimp_iacc		# yes
19857	bra.b		load_long_cont
19858
19859#########################################
19860# load a WORD into %fp0:		#
19861# 	-number can't fault		#
19862#	(1) calc ea			#
19863#	(2) read 2 bytes into L_SCR1	#
19864#	(3) fmov.w into %fp0		#
19865#########################################
19866load_word:
19867	movq.l		&0x2, %d0		# pass: 2 (bytes)
19868	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19869
19870	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19871	beq.b		load_word_immed
19872
19873	bsr.l		_dmem_read_word		# fetch src operand from memory
19874
19875	tst.l		%d1			# did dfetch fail?
19876	bne.l		facc_in_w		# yes
19877
19878load_word_cont:
19879	fmov.w		%d0, %fp0		# read into %fp0;convert to xprec
19880	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19881
19882	fbeq.w		load_word_zero		# src op is a ZERO
19883	rts
19884load_word_zero:
19885	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19886	rts
19887
19888load_word_immed:
19889	bsr.l		_imem_read_word		# fetch src operand immed data
19890
19891	tst.l		%d1			# did ifetch fail?
19892	bne.l		funimp_iacc		# yes
19893	bra.b		load_word_cont
19894
19895#########################################
19896# load a BYTE into %fp0:		#
19897# 	-number can't fault		#
19898#	(1) calc ea			#
19899#	(2) read 1 byte into L_SCR1	#
19900#	(3) fmov.b into %fp0		#
19901#########################################
19902load_byte:
19903	movq.l		&0x1, %d0		# pass: 1 (byte)
19904	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19905
19906	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19907	beq.b		load_byte_immed
19908
19909	bsr.l		_dmem_read_byte		# fetch src operand from memory
19910
19911	tst.l		%d1			# did dfetch fail?
19912	bne.l		facc_in_b		# yes
19913
19914load_byte_cont:
19915	fmov.b		%d0, %fp0		# read into %fp0;convert to xprec
19916	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19917
19918	fbeq.w		load_byte_zero		# src op is a ZERO
19919	rts
19920load_byte_zero:
19921	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19922	rts
19923
19924load_byte_immed:
19925	bsr.l		_imem_read_word		# fetch src operand immed data
19926
19927	tst.l		%d1			# did ifetch fail?
19928	bne.l		funimp_iacc		# yes
19929	bra.b		load_byte_cont
19930
19931#########################################
19932# load a SGL into %fp0:			#
19933# 	-number can't fault		#
19934#	(1) calc ea			#
19935#	(2) read 4 bytes into L_SCR1	#
19936#	(3) fmov.s into %fp0		#
19937#########################################
19938load_sgl:
19939	movq.l		&0x4, %d0		# pass: 4 (bytes)
19940	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19941
19942	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19943	beq.b		load_sgl_immed
19944
19945	bsr.l		_dmem_read_long		# fetch src operand from memory
19946	mov.l		%d0, L_SCR1(%a6)	# store src op on stack
19947
19948	tst.l		%d1			# did dfetch fail?
19949	bne.l		facc_in_l		# yes
19950
19951load_sgl_cont:
19952	lea		L_SCR1(%a6), %a0	# pass: ptr to sgl src op
19953	bsr.l		set_tag_s		# determine src type tag
19954	mov.b		%d0, STAG(%a6)		# save src optype tag on stack
19955
19956	cmpi.b		%d0, &DENORM		# is it a sgl DENORM?
19957	beq.w		get_sgl_denorm		# yes
19958
19959	cmpi.b		%d0, &SNAN		# is it a sgl SNAN?
19960	beq.w		get_sgl_snan		# yes
19961
19962	fmov.s		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
19963	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19964	rts
19965
19966load_sgl_immed:
19967	bsr.l		_imem_read_long		# fetch src operand immed data
19968
19969	tst.l		%d1			# did ifetch fail?
19970	bne.l		funimp_iacc		# yes
19971	bra.b		load_sgl_cont
19972
19973# must convert sgl denorm format to an Xprec denorm fmt suitable for
19974# normalization...
19975# %a0 : points to sgl denorm
19976get_sgl_denorm:
19977	clr.w		FP_SRC_EX(%a6)
19978	bfextu		(%a0){&9:&23}, %d0	# fetch sgl hi(_mantissa)
19979	lsl.l		&0x8, %d0
19980	mov.l		%d0, FP_SRC_HI(%a6)	# set ext hi(_mantissa)
19981	clr.l		FP_SRC_LO(%a6)		# set ext lo(_mantissa)
19982
19983	clr.w		FP_SRC_EX(%a6)
19984	btst		&0x7, (%a0)		# is sgn bit set?
19985	beq.b		sgl_dnrm_norm
19986	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
19987
19988sgl_dnrm_norm:
19989	lea		FP_SRC(%a6), %a0
19990	bsr.l		norm			# normalize number
19991	mov.w		&0x3f81, %d1		# xprec exp = 0x3f81
19992	sub.w		%d0, %d1		# exp = 0x3f81 - shft amt.
19993	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
19994
19995	mov.b		&NORM, STAG(%a6)	# fix src type tag
19996	rts
19997
19998# convert sgl to ext SNAN
19999# %a0 : points to sgl SNAN
20000get_sgl_snan:
20001	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20002	bfextu		(%a0){&9:&23}, %d0
20003	lsl.l		&0x8, %d0		# extract and insert hi(man)
20004	mov.l		%d0, FP_SRC_HI(%a6)
20005	clr.l		FP_SRC_LO(%a6)
20006
20007	btst		&0x7, (%a0)		# see if sign of SNAN is set
20008	beq.b		no_sgl_snan_sgn
20009	bset		&0x7, FP_SRC_EX(%a6)
20010no_sgl_snan_sgn:
20011	rts
20012
20013#########################################
20014# load a DBL into %fp0:			#
20015# 	-number can't fault		#
20016#	(1) calc ea			#
20017#	(2) read 8 bytes into L_SCR(1,2)#
20018#	(3) fmov.d into %fp0		#
20019#########################################
20020load_dbl:
20021	movq.l		&0x8, %d0		# pass: 8 (bytes)
20022	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
20023
20024	cmpi.b		SPCOND_FLG(%a6),&immed_flg
20025	beq.b		load_dbl_immed
20026
20027	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
20028	movq.l		&0x8, %d0		# pass: # bytes to read
20029	bsr.l		_dmem_read		# fetch src operand from memory
20030
20031	tst.l		%d1			# did dfetch fail?
20032	bne.l		facc_in_d		# yes
20033
20034load_dbl_cont:
20035	lea		L_SCR1(%a6), %a0	# pass: ptr to input dbl
20036	bsr.l		set_tag_d		# determine src type tag
20037	mov.b		%d0, STAG(%a6)		# set src optype tag
20038
20039	cmpi.b		%d0, &DENORM		# is it a dbl DENORM?
20040	beq.w		get_dbl_denorm		# yes
20041
20042	cmpi.b		%d0, &SNAN		# is it a dbl SNAN?
20043	beq.w		get_dbl_snan		# yes
20044
20045	fmov.d		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
20046	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
20047	rts
20048
20049load_dbl_immed:
20050	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
20051	movq.l		&0x8, %d0		# pass: # bytes to read
20052	bsr.l		_imem_read		# fetch src operand from memory
20053
20054	tst.l		%d1			# did ifetch fail?
20055	bne.l		funimp_iacc		# yes
20056	bra.b		load_dbl_cont
20057
20058# must convert dbl denorm format to an Xprec denorm fmt suitable for
20059# normalization...
20060# %a0 : loc. of dbl denorm
20061get_dbl_denorm:
20062	clr.w		FP_SRC_EX(%a6)
20063	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
20064	mov.l		%d0, FP_SRC_HI(%a6)
20065	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
20066	mov.l		&0xb, %d1
20067	lsl.l		%d1, %d0
20068	mov.l		%d0, FP_SRC_LO(%a6)
20069
20070	btst		&0x7, (%a0)		# is sgn bit set?
20071	beq.b		dbl_dnrm_norm
20072	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
20073
20074dbl_dnrm_norm:
20075	lea		FP_SRC(%a6), %a0
20076	bsr.l		norm			# normalize number
20077	mov.w		&0x3c01, %d1		# xprec exp = 0x3c01
20078	sub.w		%d0, %d1		# exp = 0x3c01 - shft amt.
20079	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
20080
20081	mov.b		&NORM, STAG(%a6)	# fix src type tag
20082	rts
20083
20084# convert dbl to ext SNAN
20085# %a0 : points to dbl SNAN
20086get_dbl_snan:
20087	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20088
20089	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
20090	mov.l		%d0, FP_SRC_HI(%a6)
20091	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
20092	mov.l		&0xb, %d1
20093	lsl.l		%d1, %d0
20094	mov.l		%d0, FP_SRC_LO(%a6)
20095
20096	btst		&0x7, (%a0)		# see if sign of SNAN is set
20097	beq.b		no_dbl_snan_sgn
20098	bset		&0x7, FP_SRC_EX(%a6)
20099no_dbl_snan_sgn:
20100	rts
20101
20102#################################################
20103# load a Xprec into %fp0:			#
20104# 	-number can't fault			#
20105#	(1) calc ea				#
20106#	(2) read 12 bytes into L_SCR(1,2)	#
20107#	(3) fmov.x into %fp0			#
20108#################################################
20109load_ext:
20110	mov.l		&0xc, %d0		# pass: 12 (bytes)
20111	bsr.l		_dcalc_ea		# calc <ea>
20112
20113	lea		FP_SRC(%a6), %a1	# pass: ptr to input ext tmp space
20114	mov.l		&0xc, %d0		# pass: # of bytes to read
20115	bsr.l		_dmem_read		# fetch src operand from memory
20116
20117	tst.l		%d1			# did dfetch fail?
20118	bne.l		facc_in_x		# yes
20119
20120	lea		FP_SRC(%a6), %a0	# pass: ptr to src op
20121	bsr.l		set_tag_x		# determine src type tag
20122
20123	cmpi.b		%d0, &UNNORM		# is the src op an UNNORM?
20124	beq.b		load_ext_unnorm		# yes
20125
20126	mov.b		%d0, STAG(%a6)		# store the src optype tag
20127	rts
20128
20129load_ext_unnorm:
20130	bsr.l		unnorm_fix		# fix the src UNNORM
20131	mov.b		%d0, STAG(%a6)		# store the src optype tag
20132	rts
20133
20134#################################################
20135# load a packed into %fp0:			#
20136# 	-number can't fault			#
20137#	(1) calc ea				#
20138#	(2) read 12 bytes into L_SCR(1,2,3)	#
20139#	(3) fmov.x into %fp0			#
20140#################################################
20141load_packed:
20142	bsr.l		get_packed
20143
20144	lea		FP_SRC(%a6),%a0		# pass ptr to src op
20145	bsr.l		set_tag_x		# determine src type tag
20146	cmpi.b		%d0,&UNNORM		# is the src op an UNNORM ZERO?
20147	beq.b		load_packed_unnorm	# yes
20148
20149	mov.b		%d0,STAG(%a6)		# store the src optype tag
20150	rts
20151
20152load_packed_unnorm:
20153	bsr.l		unnorm_fix		# fix the UNNORM ZERO
20154	mov.b		%d0,STAG(%a6)		# store the src optype tag
20155	rts
20156
20157#########################################################################
20158# XDEF ****************************************************************	#
20159# 	fout(): move from fp register to memory or data register	#
20160#									#
20161# XREF ****************************************************************	#
20162#	_round() - needed to create EXOP for sgl/dbl precision		#
20163#	norm() - needed to create EXOP for extended precision		#
20164#	ovf_res() - create default overflow result for sgl/dbl precision#
20165#	unf_res() - create default underflow result for sgl/dbl prec.	#
20166#	dst_dbl() - create rounded dbl precision result.		#
20167#	dst_sgl() - create rounded sgl precision result.		#
20168#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
20169#	bindec() - convert FP binary number to packed number.		#
20170#	_mem_write() - write data to memory.				#
20171#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
20172#	_dmem_write_{byte,word,long}() - write data to memory.		#
20173#	store_dreg_{b,w,l}() - store data to data register file.	#
20174#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
20175#									#
20176# INPUT ***************************************************************	#
20177#	a0 = pointer to extended precision source operand		#
20178#	d0 = round prec,mode						#
20179# 									#
20180# OUTPUT **************************************************************	#
20181#	fp0 : intermediate underflow or overflow result if		#
20182#	      OVFL/UNFL occurred for a sgl or dbl operand		#
20183#									#
20184# ALGORITHM ***********************************************************	#
20185#	This routine is accessed by many handlers that need to do an	#
20186# opclass three move of an operand out to memory.			#
20187#	Decode an fmove out (opclass 3) instruction to determine if	#
20188# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
20189# register or memory. The algorithm uses a standard "fmove" to create	#
20190# the rounded result. Also, since exceptions are disabled, this also	#
20191# create the correct OPERR default result if appropriate.		#
20192#	For sgl or dbl precision, overflow or underflow can occur. If	#
20193# either occurs and is enabled, the EXOP.				#
20194#	For extended precision, the stacked <ea> must be fixed along	#
20195# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
20196# the source is a denorm and if underflow is enabled, an EXOP must be	#
20197# created.								#
20198# 	For packed, the k-factor must be fetched from the instruction	#
20199# word or a data register. The <ea> must be fixed as w/ extended 	#
20200# precision. Then, bindec() is called to create the appropriate 	#
20201# packed result.							#
20202#	If at any time an access error is flagged by one of the move-	#
20203# to-memory routines, then a special exit must be made so that the	#
20204# access error can be handled properly.					#
20205#									#
20206#########################################################################
20207
20208	global		fout
20209fout:
20210	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
20211	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
20212	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
20213
20214	swbeg		&0x8
20215tbl_fout:
20216	short		fout_long	-	tbl_fout
20217	short		fout_sgl	-	tbl_fout
20218	short		fout_ext	-	tbl_fout
20219	short		fout_pack	-	tbl_fout
20220	short		fout_word	-	tbl_fout
20221	short		fout_dbl	-	tbl_fout
20222	short		fout_byte	-	tbl_fout
20223	short		fout_pack	-	tbl_fout
20224
20225#################################################################
20226# fmove.b out ###################################################
20227#################################################################
20228
20229# Only "Unimplemented Data Type" exceptions enter here. The operand
20230# is either a DENORM or a NORM.
20231fout_byte:
20232	tst.b		STAG(%a6)		# is operand normalized?
20233	bne.b		fout_byte_denorm	# no
20234
20235	fmovm.x		SRC(%a0),&0x80		# load value
20236
20237fout_byte_norm:
20238	fmov.l		%d0,%fpcr		# insert rnd prec,mode
20239
20240	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
20241
20242	fmov.l		&0x0,%fpcr		# clear FPCR
20243	fmov.l		%fpsr,%d1		# fetch FPSR
20244	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20245
20246	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20247	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20248	beq.b		fout_byte_dn		# must save to integer regfile
20249
20250	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20251	bsr.l		_dmem_write_byte	# write byte
20252
20253	tst.l		%d1			# did dstore fail?
20254	bne.l		facc_out_b		# yes
20255
20256	rts
20257
20258fout_byte_dn:
20259	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20260	andi.w		&0x7,%d1
20261	bsr.l		store_dreg_b
20262	rts
20263
20264fout_byte_denorm:
20265	mov.l		SRC_EX(%a0),%d1
20266	andi.l		&0x80000000,%d1		# keep DENORM sign
20267	ori.l		&0x00800000,%d1		# make smallest sgl
20268	fmov.s		%d1,%fp0
20269	bra.b		fout_byte_norm
20270
20271#################################################################
20272# fmove.w out ###################################################
20273#################################################################
20274
20275# Only "Unimplemented Data Type" exceptions enter here. The operand
20276# is either a DENORM or a NORM.
20277fout_word:
20278	tst.b		STAG(%a6)		# is operand normalized?
20279	bne.b		fout_word_denorm	# no
20280
20281	fmovm.x		SRC(%a0),&0x80		# load value
20282
20283fout_word_norm:
20284	fmov.l		%d0,%fpcr		# insert rnd prec:mode
20285
20286	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
20287
20288	fmov.l		&0x0,%fpcr		# clear FPCR
20289	fmov.l		%fpsr,%d1		# fetch FPSR
20290	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20291
20292	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20293	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20294	beq.b		fout_word_dn		# must save to integer regfile
20295
20296	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20297	bsr.l		_dmem_write_word	# write word
20298
20299	tst.l		%d1			# did dstore fail?
20300	bne.l		facc_out_w		# yes
20301
20302	rts
20303
20304fout_word_dn:
20305	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20306	andi.w		&0x7,%d1
20307	bsr.l		store_dreg_w
20308	rts
20309
20310fout_word_denorm:
20311	mov.l		SRC_EX(%a0),%d1
20312	andi.l		&0x80000000,%d1		# keep DENORM sign
20313	ori.l		&0x00800000,%d1		# make smallest sgl
20314	fmov.s		%d1,%fp0
20315	bra.b		fout_word_norm
20316
20317#################################################################
20318# fmove.l out ###################################################
20319#################################################################
20320
20321# Only "Unimplemented Data Type" exceptions enter here. The operand
20322# is either a DENORM or a NORM.
20323fout_long:
20324	tst.b		STAG(%a6)		# is operand normalized?
20325	bne.b		fout_long_denorm	# no
20326
20327	fmovm.x		SRC(%a0),&0x80		# load value
20328
20329fout_long_norm:
20330	fmov.l		%d0,%fpcr		# insert rnd prec:mode
20331
20332	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
20333
20334	fmov.l		&0x0,%fpcr		# clear FPCR
20335	fmov.l		%fpsr,%d1		# fetch FPSR
20336	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20337
20338fout_long_write:
20339	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20340	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20341	beq.b		fout_long_dn		# must save to integer regfile
20342
20343	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20344	bsr.l		_dmem_write_long	# write long
20345
20346	tst.l		%d1			# did dstore fail?
20347	bne.l		facc_out_l		# yes
20348
20349	rts
20350
20351fout_long_dn:
20352	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20353	andi.w		&0x7,%d1
20354	bsr.l		store_dreg_l
20355	rts
20356
20357fout_long_denorm:
20358	mov.l		SRC_EX(%a0),%d1
20359	andi.l		&0x80000000,%d1		# keep DENORM sign
20360	ori.l		&0x00800000,%d1		# make smallest sgl
20361	fmov.s		%d1,%fp0
20362	bra.b		fout_long_norm
20363
20364#################################################################
20365# fmove.x out ###################################################
20366#################################################################
20367
20368# Only "Unimplemented Data Type" exceptions enter here. The operand
20369# is either a DENORM or a NORM.
20370# The DENORM causes an Underflow exception.
20371fout_ext:
20372
20373# we copy the extended precision result to FP_SCR0 so that the reserved
20374# 16-bit field gets zeroed. we do this since we promise not to disturb
20375# what's at SRC(a0).
20376	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20377	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
20378	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20379	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20380
20381	fmovm.x		SRC(%a0),&0x80		# return result
20382
20383	bsr.l		_calc_ea_fout		# fix stacked <ea>
20384
20385	mov.l		%a0,%a1			# pass: dst addr
20386	lea		FP_SCR0(%a6),%a0	# pass: src addr
20387	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
20388
20389# we must not yet write the extended precision data to the stack
20390# in the pre-decrement case from supervisor mode or else we'll corrupt
20391# the stack frame. so, leave it in FP_SRC for now and deal with it later...
20392	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
20393	beq.b		fout_ext_a7
20394
20395	bsr.l		_dmem_write		# write ext prec number to memory
20396
20397	tst.l		%d1			# did dstore fail?
20398	bne.w		fout_ext_err		# yes
20399
20400	tst.b		STAG(%a6)		# is operand normalized?
20401	bne.b		fout_ext_denorm		# no
20402	rts
20403
20404# the number is a DENORM. must set the underflow exception bit
20405fout_ext_denorm:
20406	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
20407
20408	mov.b		FPCR_ENABLE(%a6),%d0
20409	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
20410	bne.b		fout_ext_exc		# yes
20411	rts
20412
20413# we don't want to do the write if the exception occurred in supervisor mode
20414# so _mem_write2() handles this for us.
20415fout_ext_a7:
20416	bsr.l		_mem_write2		# write ext prec number to memory
20417
20418	tst.l		%d1			# did dstore fail?
20419	bne.w		fout_ext_err		# yes
20420
20421	tst.b		STAG(%a6)		# is operand normalized?
20422	bne.b		fout_ext_denorm		# no
20423	rts
20424
20425fout_ext_exc:
20426	lea		FP_SCR0(%a6),%a0
20427	bsr.l		norm			# normalize the mantissa
20428	neg.w		%d0			# new exp = -(shft amt)
20429	andi.w		&0x7fff,%d0
20430	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
20431	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
20432	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
20433	rts
20434
20435fout_ext_err:
20436	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
20437	bra.l		facc_out_x
20438
20439#########################################################################
20440# fmove.s out ###########################################################
20441#########################################################################
20442fout_sgl:
20443	andi.b		&0x30,%d0		# clear rnd prec
20444	ori.b		&s_mode*0x10,%d0	# insert sgl prec
20445	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
20446
20447#
20448# operand is a normalized number. first, we check to see if the move out
20449# would cause either an underflow or overflow. these cases are handled
20450# separately. otherwise, set the FPCR to the proper rounding mode and
20451# execute the move.
20452#
20453	mov.w		SRC_EX(%a0),%d0		# extract exponent
20454	andi.w		&0x7fff,%d0		# strip sign
20455
20456	cmpi.w		%d0,&SGL_HI		# will operand overflow?
20457	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
20458	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
20459	cmpi.w		%d0,&SGL_LO		# will operand underflow?
20460	blt.w		fout_sgl_unfl		# yes; go handle underflow
20461
20462#
20463# NORMs(in range) can be stored out by a simple "fmov.s"
20464# Unnormalized inputs can come through this point.
20465#
20466fout_sgl_exg:
20467	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
20468
20469	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20470	fmov.l		&0x0,%fpsr		# clear FPSR
20471
20472	fmov.s		%fp0,%d0		# store does convert and round
20473
20474	fmov.l		&0x0,%fpcr		# clear FPCR
20475	fmov.l		%fpsr,%d1		# save FPSR
20476
20477	or.w		%d1,2+USER_FPSR(%a6) 	# set possible inex2/ainex
20478
20479fout_sgl_exg_write:
20480	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20481	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20482	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
20483
20484	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20485	bsr.l		_dmem_write_long	# write long
20486
20487	tst.l		%d1			# did dstore fail?
20488	bne.l		facc_out_l		# yes
20489
20490	rts
20491
20492fout_sgl_exg_write_dn:
20493	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20494	andi.w		&0x7,%d1
20495	bsr.l		store_dreg_l
20496	rts
20497
20498#
20499# here, we know that the operand would UNFL if moved out to single prec,
20500# so, denorm and round and then use generic store single routine to
20501# write the value to memory.
20502#
20503fout_sgl_unfl:
20504	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20505
20506	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20507	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20508	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20509	mov.l		%a0,-(%sp)
20510
20511	clr.l		%d0			# pass: S.F. = 0
20512
20513	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
20514	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
20515
20516	lea		FP_SCR0(%a6),%a0
20517	bsr.l		norm			# normalize the DENORM
20518
20519fout_sgl_unfl_cont:
20520	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
20521	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
20522	bsr.l		unf_res			# calc default underflow result
20523
20524	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
20525	bsr.l		dst_sgl			# convert to single prec
20526
20527	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20528	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20529	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
20530
20531	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20532	bsr.l		_dmem_write_long	# write long
20533
20534	tst.l		%d1			# did dstore fail?
20535	bne.l		facc_out_l		# yes
20536
20537	bra.b		fout_sgl_unfl_chkexc
20538
20539fout_sgl_unfl_dn:
20540	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20541	andi.w		&0x7,%d1
20542	bsr.l		store_dreg_l
20543
20544fout_sgl_unfl_chkexc:
20545	mov.b		FPCR_ENABLE(%a6),%d1
20546	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20547	bne.w		fout_sd_exc_unfl	# yes
20548	addq.l		&0x4,%sp
20549	rts
20550
20551#
20552# it's definitely an overflow so call ovf_res to get the correct answer
20553#
20554fout_sgl_ovfl:
20555	tst.b		3+SRC_HI(%a0)		# is result inexact?
20556	bne.b		fout_sgl_ovfl_inex2
20557	tst.l		SRC_LO(%a0)		# is result inexact?
20558	bne.b		fout_sgl_ovfl_inex2
20559	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20560	bra.b		fout_sgl_ovfl_cont
20561fout_sgl_ovfl_inex2:
20562	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20563
20564fout_sgl_ovfl_cont:
20565	mov.l		%a0,-(%sp)
20566
20567# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
20568# overflow result. DON'T save the returned ccodes from ovf_res() since
20569# fmove out doesn't alter them.
20570	tst.b		SRC_EX(%a0)		# is operand negative?
20571	smi		%d1			# set if so
20572	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
20573	bsr.l		ovf_res			# calc OVFL result
20574	fmovm.x		(%a0),&0x80		# load default overflow result
20575	fmov.s		%fp0,%d0		# store to single
20576
20577	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20578	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20579	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
20580
20581	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20582	bsr.l		_dmem_write_long	# write long
20583
20584	tst.l		%d1			# did dstore fail?
20585	bne.l		facc_out_l		# yes
20586
20587	bra.b		fout_sgl_ovfl_chkexc
20588
20589fout_sgl_ovfl_dn:
20590	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20591	andi.w		&0x7,%d1
20592	bsr.l		store_dreg_l
20593
20594fout_sgl_ovfl_chkexc:
20595	mov.b		FPCR_ENABLE(%a6),%d1
20596	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20597	bne.w		fout_sd_exc_ovfl	# yes
20598	addq.l		&0x4,%sp
20599	rts
20600
20601#
20602# move out MAY overflow:
20603# (1) force the exp to 0x3fff
20604# (2) do a move w/ appropriate rnd mode
20605# (3) if exp still equals zero, then insert original exponent
20606#	for the correct result.
20607#     if exp now equals one, then it overflowed so call ovf_res.
20608#
20609fout_sgl_may_ovfl:
20610	mov.w		SRC_EX(%a0),%d1		# fetch current sign
20611	andi.w		&0x8000,%d1		# keep it,clear exp
20612	ori.w		&0x3fff,%d1		# insert exp = 0
20613	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
20614	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20615	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20616
20617	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20618
20619	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
20620	fmov.l		&0x0,%fpcr		# clear FPCR
20621
20622	fabs.x		%fp0			# need absolute value
20623	fcmp.b		%fp0,&0x2		# did exponent increase?
20624	fblt.w		fout_sgl_exg		# no; go finish NORM
20625	bra.w		fout_sgl_ovfl		# yes; go handle overflow
20626
20627################
20628
20629fout_sd_exc_unfl:
20630	mov.l		(%sp)+,%a0
20631
20632	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20633	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20634	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20635
20636	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
20637	bne.b		fout_sd_exc_cont	# no
20638
20639	lea		FP_SCR0(%a6),%a0
20640	bsr.l		norm
20641	neg.l		%d0
20642	andi.w		&0x7fff,%d0
20643	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
20644	bra.b		fout_sd_exc_cont
20645
20646fout_sd_exc:
20647fout_sd_exc_ovfl:
20648	mov.l		(%sp)+,%a0		# restore a0
20649
20650	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20651	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20652	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20653
20654fout_sd_exc_cont:
20655	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
20656	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
20657	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
20658
20659	mov.b		3+L_SCR3(%a6),%d1
20660	lsr.b		&0x4,%d1
20661	andi.w		&0x0c,%d1
20662	swap		%d1
20663	mov.b		3+L_SCR3(%a6),%d1
20664	lsr.b		&0x4,%d1
20665	andi.w		&0x03,%d1
20666	clr.l		%d0			# pass: zero g,r,s
20667	bsr.l		_round			# round the DENORM
20668
20669	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
20670	beq.b		fout_sd_exc_done	# no
20671	bset		&0x7,FP_SCR0_EX(%a6)	# yes
20672
20673fout_sd_exc_done:
20674	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
20675	rts
20676
20677#################################################################
20678# fmove.d out ###################################################
20679#################################################################
20680fout_dbl:
20681	andi.b		&0x30,%d0		# clear rnd prec
20682	ori.b		&d_mode*0x10,%d0	# insert dbl prec
20683	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
20684
20685#
20686# operand is a normalized number. first, we check to see if the move out
20687# would cause either an underflow or overflow. these cases are handled
20688# separately. otherwise, set the FPCR to the proper rounding mode and
20689# execute the move.
20690#
20691	mov.w		SRC_EX(%a0),%d0		# extract exponent
20692	andi.w		&0x7fff,%d0		# strip sign
20693
20694	cmpi.w		%d0,&DBL_HI		# will operand overflow?
20695	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
20696	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
20697	cmpi.w		%d0,&DBL_LO		# will operand underflow?
20698	blt.w		fout_dbl_unfl		# yes; go handle underflow
20699
20700#
20701# NORMs(in range) can be stored out by a simple "fmov.d"
20702# Unnormalized inputs can come through this point.
20703#
20704fout_dbl_exg:
20705	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
20706
20707	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20708	fmov.l		&0x0,%fpsr		# clear FPSR
20709
20710	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
20711
20712	fmov.l		&0x0,%fpcr		# clear FPCR
20713	fmov.l		%fpsr,%d0		# save FPSR
20714
20715	or.w		%d0,2+USER_FPSR(%a6) 	# set possible inex2/ainex
20716
20717	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20718	lea		L_SCR1(%a6),%a0		# pass: src addr
20719	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20720	bsr.l		_dmem_write		# store dbl fop to memory
20721
20722	tst.l		%d1			# did dstore fail?
20723	bne.l		facc_out_d		# yes
20724
20725	rts					# no; so we're finished
20726
20727#
20728# here, we know that the operand would UNFL if moved out to double prec,
20729# so, denorm and round and then use generic store double routine to
20730# write the value to memory.
20731#
20732fout_dbl_unfl:
20733	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20734
20735	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20736	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20737	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20738	mov.l		%a0,-(%sp)
20739
20740	clr.l		%d0			# pass: S.F. = 0
20741
20742	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
20743	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
20744
20745	lea		FP_SCR0(%a6),%a0
20746	bsr.l		norm			# normalize the DENORM
20747
20748fout_dbl_unfl_cont:
20749	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
20750	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
20751	bsr.l		unf_res			# calc default underflow result
20752
20753	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
20754	bsr.l		dst_dbl			# convert to single prec
20755	mov.l		%d0,L_SCR1(%a6)
20756	mov.l		%d1,L_SCR2(%a6)
20757
20758	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20759	lea		L_SCR1(%a6),%a0		# pass: src addr
20760	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20761	bsr.l		_dmem_write		# store dbl fop to memory
20762
20763	tst.l		%d1			# did dstore fail?
20764	bne.l		facc_out_d		# yes
20765
20766	mov.b		FPCR_ENABLE(%a6),%d1
20767	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20768	bne.w		fout_sd_exc_unfl	# yes
20769	addq.l		&0x4,%sp
20770	rts
20771
20772#
20773# it's definitely an overflow so call ovf_res to get the correct answer
20774#
20775fout_dbl_ovfl:
20776	mov.w		2+SRC_LO(%a0),%d0
20777	andi.w		&0x7ff,%d0
20778	bne.b		fout_dbl_ovfl_inex2
20779
20780	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20781	bra.b		fout_dbl_ovfl_cont
20782fout_dbl_ovfl_inex2:
20783	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20784
20785fout_dbl_ovfl_cont:
20786	mov.l		%a0,-(%sp)
20787
20788# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
20789# overflow result. DON'T save the returned ccodes from ovf_res() since
20790# fmove out doesn't alter them.
20791	tst.b		SRC_EX(%a0)		# is operand negative?
20792	smi		%d1			# set if so
20793	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
20794	bsr.l		ovf_res			# calc OVFL result
20795	fmovm.x		(%a0),&0x80		# load default overflow result
20796	fmov.d		%fp0,L_SCR1(%a6)	# store to double
20797
20798	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20799	lea		L_SCR1(%a6),%a0		# pass: src addr
20800	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20801	bsr.l		_dmem_write		# store dbl fop to memory
20802
20803	tst.l		%d1			# did dstore fail?
20804	bne.l		facc_out_d		# yes
20805
20806	mov.b		FPCR_ENABLE(%a6),%d1
20807	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20808	bne.w		fout_sd_exc_ovfl	# yes
20809	addq.l		&0x4,%sp
20810	rts
20811
20812#
20813# move out MAY overflow:
20814# (1) force the exp to 0x3fff
20815# (2) do a move w/ appropriate rnd mode
20816# (3) if exp still equals zero, then insert original exponent
20817#	for the correct result.
20818#     if exp now equals one, then it overflowed so call ovf_res.
20819#
20820fout_dbl_may_ovfl:
20821	mov.w		SRC_EX(%a0),%d1		# fetch current sign
20822	andi.w		&0x8000,%d1		# keep it,clear exp
20823	ori.w		&0x3fff,%d1		# insert exp = 0
20824	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
20825	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20826	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20827
20828	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20829
20830	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
20831	fmov.l		&0x0,%fpcr		# clear FPCR
20832
20833	fabs.x		%fp0			# need absolute value
20834	fcmp.b		%fp0,&0x2		# did exponent increase?
20835	fblt.w		fout_dbl_exg		# no; go finish NORM
20836	bra.w		fout_dbl_ovfl		# yes; go handle overflow
20837
20838#########################################################################
20839# XDEF ****************************************************************	#
20840# 	dst_dbl(): create double precision value from extended prec.	#
20841#									#
20842# XREF ****************************************************************	#
20843#	None								#
20844#									#
20845# INPUT ***************************************************************	#
20846#	a0 = pointer to source operand in extended precision		#
20847# 									#
20848# OUTPUT **************************************************************	#
20849#	d0 = hi(double precision result)				#
20850#	d1 = lo(double precision result)				#
20851#									#
20852# ALGORITHM ***********************************************************	#
20853#									#
20854#  Changes extended precision to double precision.			#
20855#  Note: no attempt is made to round the extended value to double.	#
20856#	dbl_sign = ext_sign						#
20857#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
20858#	get rid of ext integer bit					#
20859#	dbl_mant = ext_mant{62:12}					#
20860#									#
20861#	    	---------------   ---------------    ---------------	#
20862#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
20863#	    	---------------   ---------------    ---------------	#
20864#	   	 95	    64    63 62	      32      31     11	  0	#
20865#				     |			     |		#
20866#				     |			     |		#
20867#				     |			     |		#
20868#		 	             v   		     v		#
20869#	    		      ---------------   ---------------		#
20870#  double   ->  	      |s|exp| mant  |   |  mant       |		#
20871#	    		      ---------------   ---------------		#
20872#	   	 	      63     51   32   31	       0	#
20873#									#
20874#########################################################################
20875
20876dst_dbl:
20877	clr.l		%d0			# clear d0
20878	mov.w		FTEMP_EX(%a0),%d0	# get exponent
20879	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
20880	addi.w		&DBL_BIAS,%d0		# add double precision bias
20881	tst.b		FTEMP_HI(%a0)		# is number a denorm?
20882	bmi.b		dst_get_dupper		# no
20883	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
20884dst_get_dupper:
20885	swap		%d0			# d0 now in upper word
20886	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
20887	tst.b		FTEMP_EX(%a0)		# test sign
20888	bpl.b		dst_get_dman		# if postive, go process mantissa
20889	bset		&0x1f,%d0		# if negative, set sign
20890dst_get_dman:
20891	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20892	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
20893	or.l		%d1,%d0			# put these bits in ms word of double
20894	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
20895	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20896	mov.l		&21,%d0			# load shift count
20897	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
20898	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
20899	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
20900	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
20901	mov.l		L_SCR2(%a6),%d1
20902	or.l		%d0,%d1			# put them in double result
20903	mov.l		L_SCR1(%a6),%d0
20904	rts
20905
20906#########################################################################
20907# XDEF ****************************************************************	#
20908# 	dst_sgl(): create single precision value from extended prec	#
20909#									#
20910# XREF ****************************************************************	#
20911#									#
20912# INPUT ***************************************************************	#
20913#	a0 = pointer to source operand in extended precision		#
20914# 									#
20915# OUTPUT **************************************************************	#
20916#	d0 = single precision result					#
20917#									#
20918# ALGORITHM ***********************************************************	#
20919#									#
20920# Changes extended precision to single precision.			#
20921#	sgl_sign = ext_sign						#
20922#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
20923#	get rid of ext integer bit					#
20924#	sgl_mant = ext_mant{62:12}					#
20925#									#
20926#	    	---------------   ---------------    ---------------	#
20927#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
20928#	    	---------------   ---------------    ---------------	#
20929#	   	 95	    64    63 62	   40 32      31     12	  0	#
20930#				     |	   |				#
20931#				     |	   |				#
20932#				     |	   |				#
20933#		 	             v     v				#
20934#	    		      ---------------				#
20935#  single   ->  	      |s|exp| mant  |				#
20936#	    		      ---------------				#
20937#	   	 	      31     22     0				#
20938#									#
20939#########################################################################
20940
20941dst_sgl:
20942	clr.l		%d0
20943	mov.w		FTEMP_EX(%a0),%d0	# get exponent
20944	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
20945	addi.w		&SGL_BIAS,%d0		# add single precision bias
20946	tst.b		FTEMP_HI(%a0)		# is number a denorm?
20947	bmi.b		dst_get_supper		# no
20948	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
20949dst_get_supper:
20950	swap		%d0			# put exp in upper word of d0
20951	lsl.l		&0x7,%d0		# shift it into single exp bits
20952	tst.b		FTEMP_EX(%a0)		# test sign
20953	bpl.b		dst_get_sman		# if positive, continue
20954	bset		&0x1f,%d0		# if negative, put in sign first
20955dst_get_sman:
20956	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20957	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
20958	lsr.l		&0x8,%d1		# and put them flush right
20959	or.l		%d1,%d0			# put these bits in ms word of single
20960	rts
20961
20962##############################################################################
20963fout_pack:
20964	bsr.l		_calc_ea_fout		# fetch the <ea>
20965	mov.l		%a0,-(%sp)
20966
20967	mov.b		STAG(%a6),%d0		# fetch input type
20968	bne.w		fout_pack_not_norm	# input is not NORM
20969
20970fout_pack_norm:
20971	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
20972	beq.b		fout_pack_s		# static
20973
20974fout_pack_d:
20975	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
20976	lsr.b		&0x4,%d1
20977	andi.w		&0x7,%d1
20978
20979	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
20980
20981	bra.b		fout_pack_type
20982fout_pack_s:
20983	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
20984
20985fout_pack_type:
20986	bfexts		%d0{&25:&7},%d0		# extract k-factor
20987	mov.l	%d0,-(%sp)
20988
20989	lea		FP_SRC(%a6),%a0		# pass: ptr to input
20990
20991# bindec is currently scrambling FP_SRC for denorm inputs.
20992# we'll have to change this, but for now, tough luck!!!
20993	bsr.l		bindec			# convert xprec to packed
20994
20995#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
20996	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
20997
20998	mov.l	(%sp)+,%d0
20999
21000	tst.b		3+FP_SCR0_EX(%a6)
21001	bne.b		fout_pack_set
21002	tst.l		FP_SCR0_HI(%a6)
21003	bne.b		fout_pack_set
21004	tst.l		FP_SCR0_LO(%a6)
21005	bne.b		fout_pack_set
21006
21007# add the extra condition that only if the k-factor was zero, too, should
21008# we zero the exponent
21009	tst.l		%d0
21010	bne.b		fout_pack_set
21011# "mantissa" is all zero which means that the answer is zero. but, the '040
21012# algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
21013# if the mantissa is zero, I will zero the exponent, too.
21014# the question now is whether the exponents sign bit is allowed to be non-zero
21015# for a zero, also...
21016	andi.w		&0xf000,FP_SCR0(%a6)
21017
21018fout_pack_set:
21019
21020	lea		FP_SCR0(%a6),%a0	# pass: src addr
21021
21022fout_pack_write:
21023	mov.l		(%sp)+,%a1		# pass: dst addr
21024	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
21025
21026	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
21027	beq.b		fout_pack_a7
21028
21029	bsr.l		_dmem_write		# write ext prec number to memory
21030
21031	tst.l		%d1			# did dstore fail?
21032	bne.w		fout_ext_err		# yes
21033
21034	rts
21035
21036# we don't want to do the write if the exception occurred in supervisor mode
21037# so _mem_write2() handles this for us.
21038fout_pack_a7:
21039	bsr.l		_mem_write2		# write ext prec number to memory
21040
21041	tst.l		%d1			# did dstore fail?
21042	bne.w		fout_ext_err		# yes
21043
21044	rts
21045
21046fout_pack_not_norm:
21047	cmpi.b		%d0,&DENORM		# is it a DENORM?
21048	beq.w		fout_pack_norm		# yes
21049	lea		FP_SRC(%a6),%a0
21050	clr.w		2+FP_SRC_EX(%a6)
21051	cmpi.b		%d0,&SNAN		# is it an SNAN?
21052	beq.b		fout_pack_snan		# yes
21053	bra.b		fout_pack_write		# no
21054
21055fout_pack_snan:
21056	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
21057	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
21058	bra.b		fout_pack_write
21059
21060#########################################################################
21061# XDEF ****************************************************************	#
21062#	fetch_dreg(): fetch register according to index in d1		#
21063#									#
21064# XREF ****************************************************************	#
21065#	None								#
21066#									#
21067# INPUT ***************************************************************	#
21068#	d1 = index of register to fetch from				#
21069# 									#
21070# OUTPUT **************************************************************	#
21071#	d0 = value of register fetched					#
21072#									#
21073# ALGORITHM ***********************************************************	#
21074#	According to the index value in d1 which can range from zero 	#
21075# to fifteen, load the corresponding register file value (where 	#
21076# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
21077# stack. The rest should still be in their original places.		#
21078#									#
21079#########################################################################
21080
21081# this routine leaves d1 intact for subsequent store_dreg calls.
21082	global		fetch_dreg
21083fetch_dreg:
21084	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
21085	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
21086
21087tbl_fdreg:
21088	short		fdreg0 - tbl_fdreg
21089	short		fdreg1 - tbl_fdreg
21090	short		fdreg2 - tbl_fdreg
21091	short		fdreg3 - tbl_fdreg
21092	short		fdreg4 - tbl_fdreg
21093	short		fdreg5 - tbl_fdreg
21094	short		fdreg6 - tbl_fdreg
21095	short		fdreg7 - tbl_fdreg
21096	short		fdreg8 - tbl_fdreg
21097	short		fdreg9 - tbl_fdreg
21098	short		fdrega - tbl_fdreg
21099	short		fdregb - tbl_fdreg
21100	short		fdregc - tbl_fdreg
21101	short		fdregd - tbl_fdreg
21102	short		fdrege - tbl_fdreg
21103	short		fdregf - tbl_fdreg
21104
21105fdreg0:
21106	mov.l		EXC_DREGS+0x0(%a6),%d0
21107	rts
21108fdreg1:
21109	mov.l		EXC_DREGS+0x4(%a6),%d0
21110	rts
21111fdreg2:
21112	mov.l		%d2,%d0
21113	rts
21114fdreg3:
21115	mov.l		%d3,%d0
21116	rts
21117fdreg4:
21118	mov.l		%d4,%d0
21119	rts
21120fdreg5:
21121	mov.l		%d5,%d0
21122	rts
21123fdreg6:
21124	mov.l		%d6,%d0
21125	rts
21126fdreg7:
21127	mov.l		%d7,%d0
21128	rts
21129fdreg8:
21130	mov.l		EXC_DREGS+0x8(%a6),%d0
21131	rts
21132fdreg9:
21133	mov.l		EXC_DREGS+0xc(%a6),%d0
21134	rts
21135fdrega:
21136	mov.l		%a2,%d0
21137	rts
21138fdregb:
21139	mov.l		%a3,%d0
21140	rts
21141fdregc:
21142	mov.l		%a4,%d0
21143	rts
21144fdregd:
21145	mov.l		%a5,%d0
21146	rts
21147fdrege:
21148	mov.l		(%a6),%d0
21149	rts
21150fdregf:
21151	mov.l		EXC_A7(%a6),%d0
21152	rts
21153
21154#########################################################################
21155# XDEF ****************************************************************	#
21156#	store_dreg_l(): store longword to data register specified by d1	#
21157#									#
21158# XREF ****************************************************************	#
21159#	None								#
21160#									#
21161# INPUT ***************************************************************	#
21162#	d0 = longowrd value to store					#
21163#	d1 = index of register to fetch from				#
21164# 									#
21165# OUTPUT **************************************************************	#
21166#	(data register is updated)					#
21167#									#
21168# ALGORITHM ***********************************************************	#
21169#	According to the index value in d1, store the longword value	#
21170# in d0 to the corresponding data register. D0/D1 are on the stack	#
21171# while the rest are in their initial places.				#
21172#									#
21173#########################################################################
21174
21175	global		store_dreg_l
21176store_dreg_l:
21177	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
21178	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
21179
21180tbl_sdregl:
21181	short		sdregl0 - tbl_sdregl
21182	short		sdregl1 - tbl_sdregl
21183	short		sdregl2 - tbl_sdregl
21184	short		sdregl3 - tbl_sdregl
21185	short		sdregl4 - tbl_sdregl
21186	short		sdregl5 - tbl_sdregl
21187	short		sdregl6 - tbl_sdregl
21188	short		sdregl7 - tbl_sdregl
21189
21190sdregl0:
21191	mov.l		%d0,EXC_DREGS+0x0(%a6)
21192	rts
21193sdregl1:
21194	mov.l		%d0,EXC_DREGS+0x4(%a6)
21195	rts
21196sdregl2:
21197	mov.l		%d0,%d2
21198	rts
21199sdregl3:
21200	mov.l		%d0,%d3
21201	rts
21202sdregl4:
21203	mov.l		%d0,%d4
21204	rts
21205sdregl5:
21206	mov.l		%d0,%d5
21207	rts
21208sdregl6:
21209	mov.l		%d0,%d6
21210	rts
21211sdregl7:
21212	mov.l		%d0,%d7
21213	rts
21214
21215#########################################################################
21216# XDEF ****************************************************************	#
21217#	store_dreg_w(): store word to data register specified by d1	#
21218#									#
21219# XREF ****************************************************************	#
21220#	None								#
21221#									#
21222# INPUT ***************************************************************	#
21223#	d0 = word value to store					#
21224#	d1 = index of register to fetch from				#
21225# 									#
21226# OUTPUT **************************************************************	#
21227#	(data register is updated)					#
21228#									#
21229# ALGORITHM ***********************************************************	#
21230#	According to the index value in d1, store the word value	#
21231# in d0 to the corresponding data register. D0/D1 are on the stack	#
21232# while the rest are in their initial places.				#
21233#									#
21234#########################################################################
21235
21236	global		store_dreg_w
21237store_dreg_w:
21238	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
21239	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
21240
21241tbl_sdregw:
21242	short		sdregw0 - tbl_sdregw
21243	short		sdregw1 - tbl_sdregw
21244	short		sdregw2 - tbl_sdregw
21245	short		sdregw3 - tbl_sdregw
21246	short		sdregw4 - tbl_sdregw
21247	short		sdregw5 - tbl_sdregw
21248	short		sdregw6 - tbl_sdregw
21249	short		sdregw7 - tbl_sdregw
21250
21251sdregw0:
21252	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
21253	rts
21254sdregw1:
21255	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
21256	rts
21257sdregw2:
21258	mov.w		%d0,%d2
21259	rts
21260sdregw3:
21261	mov.w		%d0,%d3
21262	rts
21263sdregw4:
21264	mov.w		%d0,%d4
21265	rts
21266sdregw5:
21267	mov.w		%d0,%d5
21268	rts
21269sdregw6:
21270	mov.w		%d0,%d6
21271	rts
21272sdregw7:
21273	mov.w		%d0,%d7
21274	rts
21275
21276#########################################################################
21277# XDEF ****************************************************************	#
21278#	store_dreg_b(): store byte to data register specified by d1	#
21279#									#
21280# XREF ****************************************************************	#
21281#	None								#
21282#									#
21283# INPUT ***************************************************************	#
21284#	d0 = byte value to store					#
21285#	d1 = index of register to fetch from				#
21286# 									#
21287# OUTPUT **************************************************************	#
21288#	(data register is updated)					#
21289#									#
21290# ALGORITHM ***********************************************************	#
21291#	According to the index value in d1, store the byte value	#
21292# in d0 to the corresponding data register. D0/D1 are on the stack	#
21293# while the rest are in their initial places.				#
21294#									#
21295#########################################################################
21296
21297	global		store_dreg_b
21298store_dreg_b:
21299	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
21300	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
21301
21302tbl_sdregb:
21303	short		sdregb0 - tbl_sdregb
21304	short		sdregb1 - tbl_sdregb
21305	short		sdregb2 - tbl_sdregb
21306	short		sdregb3 - tbl_sdregb
21307	short		sdregb4 - tbl_sdregb
21308	short		sdregb5 - tbl_sdregb
21309	short		sdregb6 - tbl_sdregb
21310	short		sdregb7 - tbl_sdregb
21311
21312sdregb0:
21313	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
21314	rts
21315sdregb1:
21316	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
21317	rts
21318sdregb2:
21319	mov.b		%d0,%d2
21320	rts
21321sdregb3:
21322	mov.b		%d0,%d3
21323	rts
21324sdregb4:
21325	mov.b		%d0,%d4
21326	rts
21327sdregb5:
21328	mov.b		%d0,%d5
21329	rts
21330sdregb6:
21331	mov.b		%d0,%d6
21332	rts
21333sdregb7:
21334	mov.b		%d0,%d7
21335	rts
21336
21337#########################################################################
21338# XDEF ****************************************************************	#
21339#	inc_areg(): increment an address register by the value in d0	#
21340#									#
21341# XREF ****************************************************************	#
21342#	None								#
21343#									#
21344# INPUT ***************************************************************	#
21345#	d0 = amount to increment by					#
21346#	d1 = index of address register to increment			#
21347# 									#
21348# OUTPUT **************************************************************	#
21349#	(address register is updated)					#
21350#									#
21351# ALGORITHM ***********************************************************	#
21352# 	Typically used for an instruction w/ a post-increment <ea>, 	#
21353# this routine adds the increment value in d0 to the address register	#
21354# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
21355# in their original places.						#
21356# 	For a7, if the increment amount is one, then we have to 	#
21357# increment by two. For any a7 update, set the mia7_flag so that if	#
21358# an access error exception occurs later in emulation, this address	#
21359# register update can be undone.					#
21360#									#
21361#########################################################################
21362
21363	global		inc_areg
21364inc_areg:
21365	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
21366	jmp		(tbl_iareg.b,%pc,%d1.w*1)
21367
21368tbl_iareg:
21369	short		iareg0 - tbl_iareg
21370	short		iareg1 - tbl_iareg
21371	short		iareg2 - tbl_iareg
21372	short		iareg3 - tbl_iareg
21373	short		iareg4 - tbl_iareg
21374	short		iareg5 - tbl_iareg
21375	short		iareg6 - tbl_iareg
21376	short		iareg7 - tbl_iareg
21377
21378iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
21379	rts
21380iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
21381	rts
21382iareg2:	add.l		%d0,%a2
21383	rts
21384iareg3:	add.l		%d0,%a3
21385	rts
21386iareg4:	add.l		%d0,%a4
21387	rts
21388iareg5:	add.l		%d0,%a5
21389	rts
21390iareg6:	add.l		%d0,(%a6)
21391	rts
21392iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
21393	cmpi.b		%d0,&0x1
21394	beq.b		iareg7b
21395	add.l		%d0,EXC_A7(%a6)
21396	rts
21397iareg7b:
21398	addq.l		&0x2,EXC_A7(%a6)
21399	rts
21400
21401#########################################################################
21402# XDEF ****************************************************************	#
21403#	dec_areg(): decrement an address register by the value in d0	#
21404#									#
21405# XREF ****************************************************************	#
21406#	None								#
21407#									#
21408# INPUT ***************************************************************	#
21409#	d0 = amount to decrement by					#
21410#	d1 = index of address register to decrement			#
21411# 									#
21412# OUTPUT **************************************************************	#
21413#	(address register is updated)					#
21414#									#
21415# ALGORITHM ***********************************************************	#
21416# 	Typically used for an instruction w/ a pre-decrement <ea>, 	#
21417# this routine adds the decrement value in d0 to the address register	#
21418# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
21419# in their original places.						#
21420# 	For a7, if the decrement amount is one, then we have to 	#
21421# decrement by two. For any a7 update, set the mda7_flag so that if	#
21422# an access error exception occurs later in emulation, this address	#
21423# register update can be undone.					#
21424#									#
21425#########################################################################
21426
21427	global		dec_areg
21428dec_areg:
21429	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
21430	jmp		(tbl_dareg.b,%pc,%d1.w*1)
21431
21432tbl_dareg:
21433	short		dareg0 - tbl_dareg
21434	short		dareg1 - tbl_dareg
21435	short		dareg2 - tbl_dareg
21436	short		dareg3 - tbl_dareg
21437	short		dareg4 - tbl_dareg
21438	short		dareg5 - tbl_dareg
21439	short		dareg6 - tbl_dareg
21440	short		dareg7 - tbl_dareg
21441
21442dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
21443	rts
21444dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
21445	rts
21446dareg2:	sub.l		%d0,%a2
21447	rts
21448dareg3:	sub.l		%d0,%a3
21449	rts
21450dareg4:	sub.l		%d0,%a4
21451	rts
21452dareg5:	sub.l		%d0,%a5
21453	rts
21454dareg6:	sub.l		%d0,(%a6)
21455	rts
21456dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
21457	cmpi.b		%d0,&0x1
21458	beq.b		dareg7b
21459	sub.l		%d0,EXC_A7(%a6)
21460	rts
21461dareg7b:
21462	subq.l		&0x2,EXC_A7(%a6)
21463	rts
21464
21465##############################################################################
21466
21467#########################################################################
21468# XDEF ****************************************************************	#
21469#	load_fpn1(): load FP register value into FP_SRC(a6).		#
21470#									#
21471# XREF ****************************************************************	#
21472#	None								#
21473#									#
21474# INPUT ***************************************************************	#
21475#	d0 = index of FP register to load				#
21476# 									#
21477# OUTPUT **************************************************************	#
21478#	FP_SRC(a6) = value loaded from FP register file			#
21479#									#
21480# ALGORITHM ***********************************************************	#
21481#	Using the index in d0, load FP_SRC(a6) with a number from the 	#
21482# FP register file.							#
21483#									#
21484#########################################################################
21485
21486	global 		load_fpn1
21487load_fpn1:
21488	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
21489	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
21490
21491tbl_load_fpn1:
21492	short		load_fpn1_0 - tbl_load_fpn1
21493	short		load_fpn1_1 - tbl_load_fpn1
21494	short		load_fpn1_2 - tbl_load_fpn1
21495	short		load_fpn1_3 - tbl_load_fpn1
21496	short		load_fpn1_4 - tbl_load_fpn1
21497	short		load_fpn1_5 - tbl_load_fpn1
21498	short		load_fpn1_6 - tbl_load_fpn1
21499	short		load_fpn1_7 - tbl_load_fpn1
21500
21501load_fpn1_0:
21502	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
21503	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
21504	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
21505	lea		FP_SRC(%a6), %a0
21506	rts
21507load_fpn1_1:
21508	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
21509	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
21510	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
21511	lea		FP_SRC(%a6), %a0
21512	rts
21513load_fpn1_2:
21514	fmovm.x		&0x20, FP_SRC(%a6)
21515	lea		FP_SRC(%a6), %a0
21516	rts
21517load_fpn1_3:
21518	fmovm.x		&0x10, FP_SRC(%a6)
21519	lea		FP_SRC(%a6), %a0
21520	rts
21521load_fpn1_4:
21522	fmovm.x		&0x08, FP_SRC(%a6)
21523	lea		FP_SRC(%a6), %a0
21524	rts
21525load_fpn1_5:
21526	fmovm.x		&0x04, FP_SRC(%a6)
21527	lea		FP_SRC(%a6), %a0
21528	rts
21529load_fpn1_6:
21530	fmovm.x		&0x02, FP_SRC(%a6)
21531	lea		FP_SRC(%a6), %a0
21532	rts
21533load_fpn1_7:
21534	fmovm.x		&0x01, FP_SRC(%a6)
21535	lea		FP_SRC(%a6), %a0
21536	rts
21537
21538#############################################################################
21539
21540#########################################################################
21541# XDEF ****************************************************************	#
21542#	load_fpn2(): load FP register value into FP_DST(a6).		#
21543#									#
21544# XREF ****************************************************************	#
21545#	None								#
21546#									#
21547# INPUT ***************************************************************	#
21548#	d0 = index of FP register to load				#
21549# 									#
21550# OUTPUT **************************************************************	#
21551#	FP_DST(a6) = value loaded from FP register file			#
21552#									#
21553# ALGORITHM ***********************************************************	#
21554#	Using the index in d0, load FP_DST(a6) with a number from the 	#
21555# FP register file.							#
21556#									#
21557#########################################################################
21558
21559	global		load_fpn2
21560load_fpn2:
21561	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
21562	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
21563
21564tbl_load_fpn2:
21565	short		load_fpn2_0 - tbl_load_fpn2
21566	short		load_fpn2_1 - tbl_load_fpn2
21567	short		load_fpn2_2 - tbl_load_fpn2
21568	short		load_fpn2_3 - tbl_load_fpn2
21569	short		load_fpn2_4 - tbl_load_fpn2
21570	short		load_fpn2_5 - tbl_load_fpn2
21571	short		load_fpn2_6 - tbl_load_fpn2
21572	short		load_fpn2_7 - tbl_load_fpn2
21573
21574load_fpn2_0:
21575	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
21576	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
21577	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
21578	lea		FP_DST(%a6), %a0
21579	rts
21580load_fpn2_1:
21581	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
21582	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
21583	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
21584	lea		FP_DST(%a6), %a0
21585	rts
21586load_fpn2_2:
21587	fmovm.x		&0x20, FP_DST(%a6)
21588	lea		FP_DST(%a6), %a0
21589	rts
21590load_fpn2_3:
21591	fmovm.x		&0x10, FP_DST(%a6)
21592	lea		FP_DST(%a6), %a0
21593	rts
21594load_fpn2_4:
21595	fmovm.x		&0x08, FP_DST(%a6)
21596	lea		FP_DST(%a6), %a0
21597	rts
21598load_fpn2_5:
21599	fmovm.x		&0x04, FP_DST(%a6)
21600	lea		FP_DST(%a6), %a0
21601	rts
21602load_fpn2_6:
21603	fmovm.x		&0x02, FP_DST(%a6)
21604	lea		FP_DST(%a6), %a0
21605	rts
21606load_fpn2_7:
21607	fmovm.x		&0x01, FP_DST(%a6)
21608	lea		FP_DST(%a6), %a0
21609	rts
21610
21611#############################################################################
21612
21613#########################################################################
21614# XDEF ****************************************************************	#
21615# 	store_fpreg(): store an fp value to the fpreg designated d0.	#
21616#									#
21617# XREF ****************************************************************	#
21618#	None								#
21619#									#
21620# INPUT ***************************************************************	#
21621#	fp0 = extended precision value to store				#
21622#	d0  = index of floating-point register				#
21623# 									#
21624# OUTPUT **************************************************************	#
21625#	None								#
21626#									#
21627# ALGORITHM ***********************************************************	#
21628#	Store the value in fp0 to the FP register designated by the	#
21629# value in d0. The FP number can be DENORM or SNAN so we have to be	#
21630# careful that we don't take an exception here.				#
21631#									#
21632#########################################################################
21633
21634	global		store_fpreg
21635store_fpreg:
21636	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
21637	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
21638
21639tbl_store_fpreg:
21640	short		store_fpreg_0 - tbl_store_fpreg
21641	short		store_fpreg_1 - tbl_store_fpreg
21642	short		store_fpreg_2 - tbl_store_fpreg
21643	short		store_fpreg_3 - tbl_store_fpreg
21644	short		store_fpreg_4 - tbl_store_fpreg
21645	short		store_fpreg_5 - tbl_store_fpreg
21646	short		store_fpreg_6 - tbl_store_fpreg
21647	short		store_fpreg_7 - tbl_store_fpreg
21648
21649store_fpreg_0:
21650	fmovm.x		&0x80, EXC_FP0(%a6)
21651	rts
21652store_fpreg_1:
21653	fmovm.x		&0x80, EXC_FP1(%a6)
21654	rts
21655store_fpreg_2:
21656	fmovm.x 	&0x01, -(%sp)
21657	fmovm.x		(%sp)+, &0x20
21658	rts
21659store_fpreg_3:
21660	fmovm.x 	&0x01, -(%sp)
21661	fmovm.x		(%sp)+, &0x10
21662	rts
21663store_fpreg_4:
21664	fmovm.x 	&0x01, -(%sp)
21665	fmovm.x		(%sp)+, &0x08
21666	rts
21667store_fpreg_5:
21668	fmovm.x 	&0x01, -(%sp)
21669	fmovm.x		(%sp)+, &0x04
21670	rts
21671store_fpreg_6:
21672	fmovm.x 	&0x01, -(%sp)
21673	fmovm.x		(%sp)+, &0x02
21674	rts
21675store_fpreg_7:
21676	fmovm.x 	&0x01, -(%sp)
21677	fmovm.x		(%sp)+, &0x01
21678	rts
21679
21680#########################################################################
21681# XDEF ****************************************************************	#
21682# 	_denorm(): denormalize an intermediate result			#
21683#									#
21684# XREF ****************************************************************	#
21685#	None								#
21686#									#
21687# INPUT *************************************************************** #
21688#	a0 = points to the operand to be denormalized			#
21689#		(in the internal extended format)			#
21690#		 							#
21691#	d0 = rounding precision						#
21692#									#
21693# OUTPUT **************************************************************	#
21694#	a0 = pointer to the denormalized result				#
21695#		(in the internal extended format)			#
21696#									#
21697#	d0 = guard,round,sticky						#
21698#									#
21699# ALGORITHM ***********************************************************	#
21700# 	According to the exponent underflow threshold for the given	#
21701# precision, shift the mantissa bits to the right in order raise the	#
21702# exponent of the operand to the threshold value. While shifting the 	#
21703# mantissa bits right, maintain the value of the guard, round, and 	#
21704# sticky bits.								#
21705# other notes:								#
21706#	(1) _denorm() is called by the underflow routines		#
21707#	(2) _denorm() does NOT affect the status register		#
21708#									#
21709#########################################################################
21710
21711#
21712# table of exponent threshold values for each precision
21713#
21714tbl_thresh:
21715	short		0x0
21716	short		sgl_thresh
21717	short		dbl_thresh
21718
21719	global		_denorm
21720_denorm:
21721#
21722# Load the exponent threshold for the precision selected and check
21723# to see if (threshold - exponent) is > 65 in which case we can
21724# simply calculate the sticky bit and zero the mantissa. otherwise
21725# we have to call the denormalization routine.
21726#
21727	lsr.b		&0x2, %d0		# shift prec to lo bits
21728	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
21729	mov.w		%d1, %d0		# copy d1 into d0
21730	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
21731	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
21732	bpl.b		denorm_set_stky		# yes; just calc sticky
21733
21734	clr.l		%d0			# clear g,r,s
21735	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
21736	beq.b		denorm_call		# no; don't change anything
21737	bset		&29, %d0		# yes; set sticky bit
21738
21739denorm_call:
21740	bsr.l		dnrm_lp			# denormalize the number
21741	rts
21742
21743#
21744# all bit would have been shifted off during the denorm so simply
21745# calculate if the sticky should be set and clear the entire mantissa.
21746#
21747denorm_set_stky:
21748	mov.l		&0x20000000, %d0	# set sticky bit in return value
21749	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
21750	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
21751	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
21752	rts
21753
21754#									#
21755# dnrm_lp(): normalize exponent/mantissa to specified threshhold	#
21756#									#
21757# INPUT:								#
21758#	%a0	   : points to the operand to be denormalized		#
21759#	%d0{31:29} : initial guard,round,sticky				#
21760#	%d1{15:0}  : denormalization threshold				#
21761# OUTPUT:								#
21762#	%a0	   : points to the denormalized operand		 	#
21763#	%d0{31:29} : final guard,round,sticky				#
21764#									#
21765
21766# *** Local Equates *** #
21767set	GRS,		L_SCR2			# g,r,s temp storage
21768set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
21769
21770	global		dnrm_lp
21771dnrm_lp:
21772
21773#
21774# make a copy of FTEMP_LO and place the g,r,s bits directly after it
21775# in memory so as to make the bitfield extraction for denormalization easier.
21776#
21777	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
21778	mov.l		%d0, GRS(%a6)		# place g,r,s after it
21779
21780#
21781# check to see how much less than the underflow threshold the operand
21782# exponent is.
21783#
21784	mov.l		%d1, %d0		# copy the denorm threshold
21785	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
21786	ble.b		dnrm_no_lp		# d1 <= 0
21787	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
21788	blt.b		case_1			# yes
21789	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
21790	blt.b		case_2			# yes
21791	bra.w		case_3			# (d1 >= 64)
21792
21793#
21794# No normalization necessary
21795#
21796dnrm_no_lp:
21797	mov.l		GRS(%a6), %d0 		# restore original g,r,s
21798	rts
21799
21800#
21801# case (0<d1<32)
21802#
21803# %d0 = denorm threshold
21804# %d1 = "n" = amt to shift
21805#
21806#	---------------------------------------------------------
21807#	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
21808#	---------------------------------------------------------
21809#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21810#	\	   \		      \			 \
21811#	 \	    \		       \		  \
21812#	  \	     \			\		   \
21813#	   \	      \			 \		    \
21814#	    \	       \		  \		     \
21815#	     \		\		   \		      \
21816#	      \		 \		    \		       \
21817#	       \	  \		     \			\
21818#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
21819#	---------------------------------------------------------
21820#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
21821#	---------------------------------------------------------
21822#
21823case_1:
21824	mov.l		%d2, -(%sp)		# create temp storage
21825
21826	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
21827	mov.l		&32, %d0
21828	sub.w		%d1, %d0		# %d0 = 32 - %d1
21829
21830	cmpi.w		%d1, &29		# is shft amt >= 29
21831	blt.b		case1_extract		# no; no fix needed
21832	mov.b		GRS(%a6), %d2
21833	or.b		%d2, 3+FTEMP_LO2(%a6)
21834
21835case1_extract:
21836	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
21837	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
21838	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
21839
21840	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
21841	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
21842
21843	bftst		%d0{&2:&30}		# were bits shifted off?
21844	beq.b		case1_sticky_clear	# no; go finish
21845	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
21846
21847case1_sticky_clear:
21848	and.l		&0xe0000000, %d0	# clear all but G,R,S
21849	mov.l		(%sp)+, %d2		# restore temp register
21850	rts
21851
21852#
21853# case (32<=d1<64)
21854#
21855# %d0 = denorm threshold
21856# %d1 = "n" = amt to shift
21857#
21858#	---------------------------------------------------------
21859#	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
21860#	---------------------------------------------------------
21861#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21862#	\	   \		      \
21863#	 \	    \		       \
21864#	  \	     \			-------------------
21865#	   \	      --------------------		   \
21866#	    -------------------	  	  \		    \
21867#	     		       \	   \		     \
21868#	      		 	\     	    \		      \
21869#	       		  	 \	     \		       \
21870#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
21871#	---------------------------------------------------------
21872#	|0...............0|0....0| NEW_LO     |grs		|
21873#	---------------------------------------------------------
21874#
21875case_2:
21876	mov.l		%d2, -(%sp)		# create temp storage
21877
21878	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
21879	subi.w		&0x20, %d1		# %d1 now between 0 and 32
21880	mov.l		&0x20, %d0
21881	sub.w		%d1, %d0		# %d0 = 32 - %d1
21882
21883# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
21884# the number of bits to check for the sticky detect.
21885# it only plays a role in shift amounts of 61-63.
21886	mov.b		GRS(%a6), %d2
21887	or.b		%d2, 3+FTEMP_LO2(%a6)
21888
21889	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
21890	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
21891
21892	bftst		%d1{&2:&30}		# were any bits shifted off?
21893	bne.b		case2_set_sticky	# yes; set sticky bit
21894	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
21895	bne.b		case2_set_sticky	# yes; set sticky bit
21896
21897	mov.l		%d1, %d0		# move new G,R,S to %d0
21898	bra.b		case2_end
21899
21900case2_set_sticky:
21901	mov.l		%d1, %d0		# move new G,R,S to %d0
21902	bset		&rnd_stky_bit, %d0	# set sticky bit
21903
21904case2_end:
21905	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
21906	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
21907	and.l		&0xe0000000, %d0	# clear all but G,R,S
21908
21909	mov.l		(%sp)+,%d2		# restore temp register
21910	rts
21911
21912#
21913# case (d1>=64)
21914#
21915# %d0 = denorm threshold
21916# %d1 = amt to shift
21917#
21918case_3:
21919	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
21920
21921	cmpi.w		%d1, &65		# is shift amt > 65?
21922	blt.b		case3_64		# no; it's == 64
21923	beq.b		case3_65		# no; it's == 65
21924
21925#
21926# case (d1>65)
21927#
21928# Shift value is > 65 and out of range. All bits are shifted off.
21929# Return a zero mantissa with the sticky bit set
21930#
21931	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
21932	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
21933	mov.l		&0x20000000, %d0	# set sticky bit
21934	rts
21935
21936#
21937# case (d1 == 64)
21938#
21939#	---------------------------------------------------------
21940#	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
21941#	---------------------------------------------------------
21942#	<-------(32)------>
21943#	\	   	   \
21944#	 \	    	    \
21945#	  \	     	     \
21946#	   \	      	      ------------------------------
21947#	    -------------------------------		    \
21948#	     		       		   \		     \
21949#	      		 	     	    \		      \
21950#	       		  	 	     \		       \
21951#					      <-------(32)------>
21952#	---------------------------------------------------------
21953#	|0...............0|0................0|grs		|
21954#	---------------------------------------------------------
21955#
21956case3_64:
21957	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
21958	mov.l		%d0, %d1		# make a copy
21959	and.l		&0xc0000000, %d0	# extract G,R
21960	and.l		&0x3fffffff, %d1	# extract other bits
21961
21962	bra.b		case3_complete
21963
21964#
21965# case (d1 == 65)
21966#
21967#	---------------------------------------------------------
21968#	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
21969#	---------------------------------------------------------
21970#	<-------(32)------>
21971#	\	   	   \
21972#	 \	    	    \
21973#	  \	     	     \
21974#	   \	      	      ------------------------------
21975#	    --------------------------------		    \
21976#	     		       		    \		     \
21977#	      		 	     	     \		      \
21978#	       		  	 	      \		       \
21979#					       <-------(31)----->
21980#	---------------------------------------------------------
21981#	|0...............0|0................0|0rs		|
21982#	---------------------------------------------------------
21983#
21984case3_65:
21985	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
21986	and.l		&0x80000000, %d0	# extract R bit
21987	lsr.l		&0x1, %d0		# shift high bit into R bit
21988	and.l		&0x7fffffff, %d1	# extract other bits
21989
21990case3_complete:
21991# last operation done was an "and" of the bits shifted off so the condition
21992# codes are already set so branch accordingly.
21993	bne.b		case3_set_sticky	# yes; go set new sticky
21994	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
21995	bne.b		case3_set_sticky	# yes; go set new sticky
21996	tst.b		GRS(%a6)		# were any bits shifted off?
21997	bne.b		case3_set_sticky	# yes; go set new sticky
21998
21999#
22000# no bits were shifted off so don't set the sticky bit.
22001# the guard and
22002# the entire mantissa is zero.
22003#
22004	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
22005	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
22006	rts
22007
22008#
22009# some bits were shifted off so set the sticky bit.
22010# the entire mantissa is zero.
22011#
22012case3_set_sticky:
22013	bset		&rnd_stky_bit,%d0	# set new sticky bit
22014	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
22015	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
22016	rts
22017
22018#########################################################################
22019# XDEF ****************************************************************	#
22020#	_round(): round result according to precision/mode		#
22021#									#
22022# XREF ****************************************************************	#
22023#	None								#
22024#									#
22025# INPUT ***************************************************************	#
22026#	a0	  = ptr to input operand in internal extended format 	#
22027#	d1(hi)    = contains rounding precision:			#
22028#			ext = $0000xxxx					#
22029#			sgl = $0004xxxx					#
22030#			dbl = $0008xxxx					#
22031#	d1(lo)	  = contains rounding mode:				#
22032#			RN  = $xxxx0000					#
22033#			RZ  = $xxxx0001					#
22034#			RM  = $xxxx0002					#
22035#			RP  = $xxxx0003					#
22036#	d0{31:29} = contains the g,r,s bits (extended)			#
22037#									#
22038# OUTPUT **************************************************************	#
22039#	a0 = pointer to rounded result					#
22040#									#
22041# ALGORITHM ***********************************************************	#
22042#	On return the value pointed to by a0 is correctly rounded,	#
22043#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
22044#	The result is not typed - the tag field is invalid.  The	#
22045#	result is still in the internal extended format.		#
22046#									#
22047#	The INEX bit of USER_FPSR will be set if the rounded result was	#
22048#	inexact (i.e. if any of the g-r-s bits were set).		#
22049#									#
22050#########################################################################
22051
22052	global		_round
22053_round:
22054#
22055# ext_grs() looks at the rounding precision and sets the appropriate
22056# G,R,S bits.
22057# If (G,R,S == 0) then result is exact and round is done, else set
22058# the inex flag in status reg and continue.
22059#
22060	bsr.l		ext_grs			# extract G,R,S
22061
22062	tst.l		%d0			# are G,R,S zero?
22063	beq.w		truncate		# yes; round is complete
22064
22065	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
22066
22067#
22068# Use rounding mode as an index into a jump table for these modes.
22069# All of the following assumes grs != 0.
22070#
22071	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
22072	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
22073
22074tbl_mode:
22075	short		rnd_near - tbl_mode
22076	short		truncate - tbl_mode	# RZ always truncates
22077	short		rnd_mnus - tbl_mode
22078	short		rnd_plus - tbl_mode
22079
22080#################################################################
22081#	ROUND PLUS INFINITY					#
22082#								#
22083#	If sign of fp number = 0 (positive), then add 1 to l.	#
22084#################################################################
22085rnd_plus:
22086	tst.b		FTEMP_SGN(%a0)		# check for sign
22087	bmi.w		truncate		# if positive then truncate
22088
22089	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
22090	swap		%d1			# set up d1 for round prec.
22091
22092	cmpi.b		%d1, &s_mode		# is prec = sgl?
22093	beq.w		add_sgl			# yes
22094	bgt.w		add_dbl			# no; it's dbl
22095	bra.w		add_ext			# no; it's ext
22096
22097#################################################################
22098#	ROUND MINUS INFINITY					#
22099#								#
22100#	If sign of fp number = 1 (negative), then add 1 to l.	#
22101#################################################################
22102rnd_mnus:
22103	tst.b		FTEMP_SGN(%a0)		# check for sign
22104	bpl.w		truncate		# if negative then truncate
22105
22106	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
22107	swap		%d1			# set up d1 for round prec.
22108
22109	cmpi.b		%d1, &s_mode		# is prec = sgl?
22110	beq.w		add_sgl			# yes
22111	bgt.w		add_dbl			# no; it's dbl
22112	bra.w		add_ext			# no; it's ext
22113
22114#################################################################
22115#	ROUND NEAREST						#
22116#								#
22117#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
22118#	Note that this will round to even in case of a tie.	#
22119#################################################################
22120rnd_near:
22121	asl.l		&0x1, %d0		# shift g-bit to c-bit
22122	bcc.w		truncate		# if (g=1) then
22123
22124	swap		%d1			# set up d1 for round prec.
22125
22126	cmpi.b		%d1, &s_mode		# is prec = sgl?
22127	beq.w		add_sgl			# yes
22128	bgt.w		add_dbl			# no; it's dbl
22129	bra.w		add_ext			# no; it's ext
22130
22131# *** LOCAL EQUATES ***
22132set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
22133set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
22134
22135#########################
22136#	ADD SINGLE	#
22137#########################
22138add_sgl:
22139	add.l		&ad_1_sgl, FTEMP_HI(%a0)
22140	bcc.b		scc_clr			# no mantissa overflow
22141	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
22142	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
22143	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
22144scc_clr:
22145	tst.l		%d0			# test for rs = 0
22146	bne.b		sgl_done
22147	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
22148sgl_done:
22149	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
22150	clr.l		FTEMP_LO(%a0)		# clear d2
22151	rts
22152
22153#########################
22154#	ADD EXTENDED	#
22155#########################
22156add_ext:
22157	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
22158	bcc.b		xcc_clr			# test for carry out
22159	addq.l		&1,FTEMP_HI(%a0)	# propogate carry
22160	bcc.b		xcc_clr
22161	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
22162	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
22163	roxr.w		FTEMP_LO(%a0)
22164	roxr.w		FTEMP_LO+2(%a0)
22165	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
22166xcc_clr:
22167	tst.l		%d0			# test rs = 0
22168	bne.b		add_ext_done
22169	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
22170add_ext_done:
22171	rts
22172
22173#########################
22174#	ADD DOUBLE	#
22175#########################
22176add_dbl:
22177	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
22178	bcc.b		dcc_clr			# no carry
22179	addq.l		&0x1, FTEMP_HI(%a0)	# propogate carry
22180	bcc.b		dcc_clr			# no carry
22181
22182	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
22183	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
22184	roxr.w		FTEMP_LO(%a0)
22185	roxr.w		FTEMP_LO+2(%a0)
22186	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
22187dcc_clr:
22188	tst.l		%d0			# test for rs = 0
22189	bne.b		dbl_done
22190	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
22191
22192dbl_done:
22193	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
22194	rts
22195
22196###########################
22197# Truncate all other bits #
22198###########################
22199truncate:
22200	swap		%d1			# select rnd prec
22201
22202	cmpi.b		%d1, &s_mode		# is prec sgl?
22203	beq.w		sgl_done		# yes
22204	bgt.b		dbl_done		# no; it's dbl
22205	rts					# no; it's ext
22206
22207
22208#
22209# ext_grs(): extract guard, round and sticky bits according to
22210#	     rounding precision.
22211#
22212# INPUT
22213#	d0	   = extended precision g,r,s (in d0{31:29})
22214#	d1 	   = {PREC,ROUND}
22215# OUTPUT
22216#	d0{31:29}  = guard, round, sticky
22217#
22218# The ext_grs extract the guard/round/sticky bits according to the
22219# selected rounding precision. It is called by the round subroutine
22220# only.  All registers except d0 are kept intact. d0 becomes an
22221# updated guard,round,sticky in d0{31:29}
22222#
22223# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
22224#	 prior to usage, and needs to restore d1 to original. this
22225#	 routine is tightly tied to the round routine and not meant to
22226#	 uphold standard subroutine calling practices.
22227#
22228
22229ext_grs:
22230	swap		%d1			# have d1.w point to round precision
22231	tst.b		%d1			# is rnd prec = extended?
22232	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
22233
22234#
22235# %d0 actually already hold g,r,s since _round() had it before calling
22236# this function. so, as long as we don't disturb it, we are "returning" it.
22237#
22238ext_grs_ext:
22239	swap		%d1			# yes; return to correct positions
22240	rts
22241
22242ext_grs_not_ext:
22243	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
22244
22245	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
22246	bne.b		ext_grs_dbl		# no; go handle dbl
22247
22248#
22249# sgl:
22250#	96		64	  40	32		0
22251#	-----------------------------------------------------
22252#	| EXP	|XXXXXXX|	  |xx	|		|grs|
22253#	-----------------------------------------------------
22254#			<--(24)--->nn\			   /
22255#				   ee ---------------------
22256#				   ww		|
22257#						v
22258#				   gr	   new sticky
22259#
22260ext_grs_sgl:
22261	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
22262	mov.l		&30, %d2		# of the sgl prec. limits
22263	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
22264	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
22265	and.l		&0x0000003f, %d2	# s bit is the or of all other
22266	bne.b		ext_grs_st_stky		# bits to the right of g-r
22267	tst.l		FTEMP_LO(%a0)		# test lower mantissa
22268	bne.b		ext_grs_st_stky		# if any are set, set sticky
22269	tst.l		%d0			# test original g,r,s
22270	bne.b		ext_grs_st_stky		# if any are set, set sticky
22271	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
22272
22273#
22274# dbl:
22275#	96		64	  	32	 11	0
22276#	-----------------------------------------------------
22277#	| EXP	|XXXXXXX|	  	|	 |xx	|grs|
22278#	-----------------------------------------------------
22279#						  nn\	    /
22280#						  ee -------
22281#						  ww	|
22282#							v
22283#						  gr	new sticky
22284#
22285ext_grs_dbl:
22286	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
22287	mov.l		&30, %d2		# of the dbl prec. limits
22288	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
22289	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
22290	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
22291	bne.b		ext_grs_st_stky		# other bits to the right of g-r
22292	tst.l		%d0			# test word original g,r,s
22293	bne.b		ext_grs_st_stky		# if any are set, set sticky
22294	bra.b		ext_grs_end_sd		# if clear, exit
22295
22296ext_grs_st_stky:
22297	bset		&rnd_stky_bit, %d3	# set sticky bit
22298ext_grs_end_sd:
22299	mov.l		%d3, %d0		# return grs to d0
22300
22301	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
22302
22303	swap		%d1			# restore d1 to original
22304	rts
22305
22306#########################################################################
22307# norm(): normalize the mantissa of an extended precision input. the	#
22308#	  input operand should not be normalized already.		#
22309#									#
22310# XDEF ****************************************************************	#
22311#	norm()								#
22312#									#
22313# XREF **************************************************************** #
22314#	none								#
22315#									#
22316# INPUT *************************************************************** #
22317#	a0 = pointer fp extended precision operand to normalize		#
22318#									#
22319# OUTPUT ************************************************************** #
22320# 	d0 = number of bit positions the mantissa was shifted		#
22321#	a0 = the input operand's mantissa is normalized; the exponent	#
22322#	     is unchanged.						#
22323#									#
22324#########################################################################
22325	global		norm
22326norm:
22327	mov.l		%d2, -(%sp)		# create some temp regs
22328	mov.l		%d3, -(%sp)
22329
22330	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
22331	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
22332
22333	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
22334	beq.b		norm_lo			# hi(man) is all zeroes!
22335
22336norm_hi:
22337	lsl.l		%d2, %d0		# left shift hi(man)
22338	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
22339
22340	or.l		%d3, %d0		# create hi(man)
22341	lsl.l		%d2, %d1		# create lo(man)
22342
22343	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
22344	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
22345
22346	mov.l		%d2, %d0		# return shift amount
22347
22348	mov.l		(%sp)+, %d3		# restore temp regs
22349	mov.l		(%sp)+, %d2
22350
22351	rts
22352
22353norm_lo:
22354	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
22355	lsl.l		%d2, %d1		# shift lo(man)
22356	add.l		&32, %d2		# add 32 to shft amount
22357
22358	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
22359	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
22360
22361	mov.l		%d2, %d0		# return shift amount
22362
22363	mov.l		(%sp)+, %d3		# restore temp regs
22364	mov.l		(%sp)+, %d2
22365
22366	rts
22367
22368#########################################################################
22369# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
22370#		- returns corresponding optype tag			#
22371#									#
22372# XDEF ****************************************************************	#
22373#	unnorm_fix()							#
22374#									#
22375# XREF **************************************************************** #
22376#	norm() - normalize the mantissa					#
22377#									#
22378# INPUT *************************************************************** #
22379#	a0 = pointer to unnormalized extended precision number		#
22380#									#
22381# OUTPUT ************************************************************** #
22382#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
22383#	a0 = input operand has been converted to a norm, denorm, or	#
22384#	     zero; both the exponent and mantissa are changed.		#
22385#									#
22386#########################################################################
22387
22388	global		unnorm_fix
22389unnorm_fix:
22390	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
22391	bne.b		unnorm_shift		# hi(man) is not all zeroes
22392
22393#
22394# hi(man) is all zeroes so see if any bits in lo(man) are set
22395#
22396unnorm_chk_lo:
22397	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
22398	beq.w		unnorm_zero		# yes
22399
22400	add.w		&32, %d0		# no; fix shift distance
22401
22402#
22403# d0 = # shifts needed for complete normalization
22404#
22405unnorm_shift:
22406	clr.l		%d1			# clear top word
22407	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
22408	and.w		&0x7fff, %d1		# strip off sgn
22409
22410	cmp.w		%d0, %d1		# will denorm push exp < 0?
22411	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
22412
22413#
22414# exponent would not go < 0. therefore, number stays normalized
22415#
22416	sub.w		%d0, %d1		# shift exponent value
22417	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
22418	and.w		&0x8000, %d0		# save old sign
22419	or.w		%d0, %d1		# {sgn,new exp}
22420	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
22421
22422	bsr.l		norm			# normalize UNNORM
22423
22424	mov.b		&NORM, %d0		# return new optype tag
22425	rts
22426
22427#
22428# exponent would go < 0, so only denormalize until exp = 0
22429#
22430unnorm_nrm_zero:
22431	cmp.b		%d1, &32		# is exp <= 32?
22432	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
22433
22434	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
22435	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
22436
22437	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
22438	lsl.l		%d1, %d0		# extract new lo(man)
22439	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
22440
22441	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
22442
22443	mov.b		&DENORM, %d0		# return new optype tag
22444	rts
22445
22446#
22447# only mantissa bits set are in lo(man)
22448#
22449unnorm_nrm_zero_lrg:
22450	sub.w		&32, %d1		# adjust shft amt by 32
22451
22452	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
22453	lsl.l		%d1, %d0		# left shift lo(man)
22454
22455	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
22456	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
22457
22458	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
22459
22460	mov.b		&DENORM, %d0		# return new optype tag
22461	rts
22462
22463#
22464# whole mantissa is zero so this UNNORM is actually a zero
22465#
22466unnorm_zero:
22467	and.w		&0x8000, FTEMP_EX(%a0) 	# force exponent to zero
22468
22469	mov.b		&ZERO, %d0		# fix optype tag
22470	rts
22471
22472#########################################################################
22473# XDEF ****************************************************************	#
22474# 	set_tag_x(): return the optype of the input ext fp number	#
22475#									#
22476# XREF ****************************************************************	#
22477#	None								#
22478#									#
22479# INPUT ***************************************************************	#
22480#	a0 = pointer to extended precision operand			#
22481# 									#
22482# OUTPUT **************************************************************	#
22483#	d0 = value of type tag						#
22484# 		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
22485#									#
22486# ALGORITHM ***********************************************************	#
22487#	Simply test the exponent, j-bit, and mantissa values to 	#
22488# determine the type of operand.					#
22489#	If it's an unnormalized zero, alter the operand and force it	#
22490# to be a normal zero.							#
22491#									#
22492#########################################################################
22493
22494	global		set_tag_x
22495set_tag_x:
22496	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
22497	andi.w		&0x7fff, %d0		# strip off sign
22498	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
22499	beq.b		inf_or_nan_x
22500not_inf_or_nan_x:
22501	btst		&0x7,FTEMP_HI(%a0)
22502	beq.b		not_norm_x
22503is_norm_x:
22504	mov.b		&NORM, %d0
22505	rts
22506not_norm_x:
22507	tst.w		%d0			# is exponent = 0?
22508	bne.b		is_unnorm_x
22509not_unnorm_x:
22510	tst.l		FTEMP_HI(%a0)
22511	bne.b		is_denorm_x
22512	tst.l		FTEMP_LO(%a0)
22513	bne.b		is_denorm_x
22514is_zero_x:
22515	mov.b		&ZERO, %d0
22516	rts
22517is_denorm_x:
22518	mov.b		&DENORM, %d0
22519	rts
22520# must distinguish now "Unnormalized zeroes" which we
22521# must convert to zero.
22522is_unnorm_x:
22523	tst.l		FTEMP_HI(%a0)
22524	bne.b		is_unnorm_reg_x
22525	tst.l		FTEMP_LO(%a0)
22526	bne.b		is_unnorm_reg_x
22527# it's an "unnormalized zero". let's convert it to an actual zero...
22528	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
22529	mov.b		&ZERO, %d0
22530	rts
22531is_unnorm_reg_x:
22532	mov.b		&UNNORM, %d0
22533	rts
22534inf_or_nan_x:
22535	tst.l		FTEMP_LO(%a0)
22536	bne.b		is_nan_x
22537	mov.l		FTEMP_HI(%a0), %d0
22538	and.l		&0x7fffffff, %d0	# msb is a don't care!
22539	bne.b		is_nan_x
22540is_inf_x:
22541	mov.b		&INF, %d0
22542	rts
22543is_nan_x:
22544	btst		&0x6, FTEMP_HI(%a0)
22545	beq.b		is_snan_x
22546	mov.b		&QNAN, %d0
22547	rts
22548is_snan_x:
22549	mov.b		&SNAN, %d0
22550	rts
22551
22552#########################################################################
22553# XDEF ****************************************************************	#
22554# 	set_tag_d(): return the optype of the input dbl fp number	#
22555#									#
22556# XREF ****************************************************************	#
22557#	None								#
22558#									#
22559# INPUT ***************************************************************	#
22560#	a0 = points to double precision operand				#
22561# 									#
22562# OUTPUT **************************************************************	#
22563#	d0 = value of type tag						#
22564# 		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
22565#									#
22566# ALGORITHM ***********************************************************	#
22567#	Simply test the exponent, j-bit, and mantissa values to 	#
22568# determine the type of operand.					#
22569#									#
22570#########################################################################
22571
22572	global		set_tag_d
22573set_tag_d:
22574	mov.l		FTEMP(%a0), %d0
22575	mov.l		%d0, %d1
22576
22577	andi.l		&0x7ff00000, %d0
22578	beq.b		zero_or_denorm_d
22579
22580	cmpi.l		%d0, &0x7ff00000
22581	beq.b		inf_or_nan_d
22582
22583is_norm_d:
22584	mov.b		&NORM, %d0
22585	rts
22586zero_or_denorm_d:
22587	and.l		&0x000fffff, %d1
22588	bne		is_denorm_d
22589	tst.l		4+FTEMP(%a0)
22590	bne		is_denorm_d
22591is_zero_d:
22592	mov.b		&ZERO, %d0
22593	rts
22594is_denorm_d:
22595	mov.b		&DENORM, %d0
22596	rts
22597inf_or_nan_d:
22598	and.l		&0x000fffff, %d1
22599	bne		is_nan_d
22600	tst.l		4+FTEMP(%a0)
22601	bne		is_nan_d
22602is_inf_d:
22603	mov.b		&INF, %d0
22604	rts
22605is_nan_d:
22606	btst		&19, %d1
22607	bne		is_qnan_d
22608is_snan_d:
22609	mov.b		&SNAN, %d0
22610	rts
22611is_qnan_d:
22612	mov.b		&QNAN, %d0
22613	rts
22614
22615#########################################################################
22616# XDEF ****************************************************************	#
22617# 	set_tag_s(): return the optype of the input sgl fp number	#
22618#									#
22619# XREF ****************************************************************	#
22620#	None								#
22621#									#
22622# INPUT ***************************************************************	#
22623#	a0 = pointer to single precision operand			#
22624# 									#
22625# OUTPUT **************************************************************	#
22626#	d0 = value of type tag						#
22627# 		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
22628#									#
22629# ALGORITHM ***********************************************************	#
22630#	Simply test the exponent, j-bit, and mantissa values to 	#
22631# determine the type of operand.					#
22632#									#
22633#########################################################################
22634
22635	global		set_tag_s
22636set_tag_s:
22637	mov.l		FTEMP(%a0), %d0
22638	mov.l		%d0, %d1
22639
22640	andi.l		&0x7f800000, %d0
22641	beq.b		zero_or_denorm_s
22642
22643	cmpi.l		%d0, &0x7f800000
22644	beq.b		inf_or_nan_s
22645
22646is_norm_s:
22647	mov.b		&NORM, %d0
22648	rts
22649zero_or_denorm_s:
22650	and.l		&0x007fffff, %d1
22651	bne		is_denorm_s
22652is_zero_s:
22653	mov.b		&ZERO, %d0
22654	rts
22655is_denorm_s:
22656	mov.b		&DENORM, %d0
22657	rts
22658inf_or_nan_s:
22659	and.l		&0x007fffff, %d1
22660	bne		is_nan_s
22661is_inf_s:
22662	mov.b		&INF, %d0
22663	rts
22664is_nan_s:
22665	btst		&22, %d1
22666	bne		is_qnan_s
22667is_snan_s:
22668	mov.b		&SNAN, %d0
22669	rts
22670is_qnan_s:
22671	mov.b		&QNAN, %d0
22672	rts
22673
22674#########################################################################
22675# XDEF ****************************************************************	#
22676# 	unf_res(): routine to produce default underflow result of a 	#
22677#	 	   scaled extended precision number; this is used by 	#
22678#		   fadd/fdiv/fmul/etc. emulation routines.		#
22679# 	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
22680#		    single round prec and extended prec mode.		#
22681#									#
22682# XREF ****************************************************************	#
22683#	_denorm() - denormalize according to scale factor		#
22684# 	_round() - round denormalized number according to rnd prec	#
22685#									#
22686# INPUT ***************************************************************	#
22687#	a0 = pointer to extended precison operand			#
22688#	d0 = scale factor						#
22689#	d1 = rounding precision/mode					#
22690#									#
22691# OUTPUT **************************************************************	#
22692#	a0 = pointer to default underflow result in extended precision	#
22693#	d0.b = result FPSR_cc which caller may or may not want to save	#
22694#									#
22695# ALGORITHM ***********************************************************	#
22696# 	Convert the input operand to "internal format" which means the	#
22697# exponent is extended to 16 bits and the sign is stored in the unused	#
22698# portion of the extended precison operand. Denormalize the number	#
22699# according to the scale factor passed in d0. Then, round the 		#
22700# denormalized result.							#
22701# 	Set the FPSR_exc bits as appropriate but return the cc bits in	#
22702# d0 in case the caller doesn't want to save them (as is the case for	#
22703# fmove out).								#
22704# 	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
22705# precision and the rounding mode to single.				#
22706#									#
22707#########################################################################
22708	global		unf_res
22709unf_res:
22710	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
22711
22712	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
22713	sne		FTEMP_SGN(%a0)
22714
22715	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
22716	and.w		&0x7fff, %d1
22717	sub.w		%d0, %d1
22718	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
22719
22720	mov.l		%a0, -(%sp)		# save operand ptr during calls
22721
22722	mov.l		0x4(%sp),%d0		# pass rnd prec.
22723	andi.w		&0x00c0,%d0
22724	lsr.w		&0x4,%d0
22725	bsr.l		_denorm			# denorm result
22726
22727	mov.l		(%sp),%a0
22728	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
22729	andi.w		&0xc0,%d1		# extract rnd prec
22730	lsr.w		&0x4,%d1
22731	swap		%d1
22732	mov.w		0x6(%sp),%d1
22733	andi.w		&0x30,%d1
22734	lsr.w		&0x4,%d1
22735	bsr.l		_round			# round the denorm
22736
22737	mov.l		(%sp)+, %a0
22738
22739# result is now rounded properly. convert back to normal format
22740	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
22741	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
22742	beq.b		unf_res_chkifzero	# no; result is positive
22743	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
22744	clr.b		FTEMP_SGN(%a0)		# clear temp sign
22745
22746# the number may have become zero after rounding. set ccodes accordingly.
22747unf_res_chkifzero:
22748	clr.l		%d0
22749	tst.l		FTEMP_HI(%a0)		# is value now a zero?
22750	bne.b		unf_res_cont		# no
22751	tst.l		FTEMP_LO(%a0)
22752	bne.b		unf_res_cont		# no
22753#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
22754	bset		&z_bit, %d0		# yes; set zero ccode bit
22755
22756unf_res_cont:
22757
22758#
22759# can inex1 also be set along with unfl and inex2???
22760#
22761# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22762#
22763	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
22764	beq.b		unf_res_end		# no
22765	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
22766
22767unf_res_end:
22768	add.l		&0x4, %sp		# clear stack
22769	rts
22770
22771# unf_res() for fsglmul() and fsgldiv().
22772	global		unf_res4
22773unf_res4:
22774	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
22775
22776	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
22777	sne		FTEMP_SGN(%a0)
22778
22779	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
22780	and.w		&0x7fff,%d1
22781	sub.w		%d0,%d1
22782	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
22783
22784	mov.l		%a0,-(%sp)		# save operand ptr during calls
22785
22786	clr.l		%d0			# force rnd prec = ext
22787	bsr.l		_denorm			# denorm result
22788
22789	mov.l		(%sp),%a0
22790	mov.w		&s_mode,%d1		# force rnd prec = sgl
22791	swap		%d1
22792	mov.w		0x6(%sp),%d1		# load rnd mode
22793	andi.w		&0x30,%d1		# extract rnd prec
22794	lsr.w		&0x4,%d1
22795	bsr.l		_round			# round the denorm
22796
22797	mov.l		(%sp)+,%a0
22798
22799# result is now rounded properly. convert back to normal format
22800	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
22801	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
22802	beq.b		unf_res4_chkifzero	# no; result is positive
22803	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
22804	clr.b		FTEMP_SGN(%a0)		# clear temp sign
22805
22806# the number may have become zero after rounding. set ccodes accordingly.
22807unf_res4_chkifzero:
22808	clr.l		%d0
22809	tst.l		FTEMP_HI(%a0)		# is value now a zero?
22810	bne.b		unf_res4_cont		# no
22811	tst.l		FTEMP_LO(%a0)
22812	bne.b		unf_res4_cont		# no
22813#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
22814	bset		&z_bit,%d0		# yes; set zero ccode bit
22815
22816unf_res4_cont:
22817
22818#
22819# can inex1 also be set along with unfl and inex2???
22820#
22821# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22822#
22823	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
22824	beq.b		unf_res4_end		# no
22825	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
22826
22827unf_res4_end:
22828	add.l		&0x4,%sp		# clear stack
22829	rts
22830
22831#########################################################################
22832# XDEF ****************************************************************	#
22833#	ovf_res(): routine to produce the default overflow result of	#
22834#		   an overflowing number.				#
22835#	ovf_res2(): same as above but the rnd mode/prec are passed	#
22836#		    differently.					#
22837#									#
22838# XREF ****************************************************************	#
22839#	none								#
22840#									#
22841# INPUT ***************************************************************	#
22842#	d1.b 	= '-1' => (-); '0' => (+)				#
22843#   ovf_res():								#
22844#	d0 	= rnd mode/prec						#
22845#   ovf_res2():								#
22846#	hi(d0) 	= rnd prec						#
22847#	lo(d0)	= rnd mode						#
22848#									#
22849# OUTPUT **************************************************************	#
22850#	a0   	= points to extended precision result			#
22851#	d0.b 	= condition code bits					#
22852#									#
22853# ALGORITHM ***********************************************************	#
22854#	The default overflow result can be determined by the sign of	#
22855# the result and the rounding mode/prec in effect. These bits are	#
22856# concatenated together to create an index into the default result 	#
22857# table. A pointer to the correct result is returned in a0. The		#
22858# resulting condition codes are returned in d0 in case the caller 	#
22859# doesn't want FPSR_cc altered (as is the case for fmove out).		#
22860#									#
22861#########################################################################
22862
22863	global		ovf_res
22864ovf_res:
22865	andi.w		&0x10,%d1		# keep result sign
22866	lsr.b		&0x4,%d0		# shift prec/mode
22867	or.b		%d0,%d1			# concat the two
22868	mov.w		%d1,%d0			# make a copy
22869	lsl.b		&0x1,%d1		# multiply d1 by 2
22870	bra.b		ovf_res_load
22871
22872	global		ovf_res2
22873ovf_res2:
22874	and.w		&0x10, %d1		# keep result sign
22875	or.b		%d0, %d1		# insert rnd mode
22876	swap		%d0
22877	or.b		%d0, %d1		# insert rnd prec
22878	mov.w		%d1, %d0		# make a copy
22879	lsl.b		&0x1, %d1		# shift left by 1
22880
22881#
22882# use the rounding mode, precision, and result sign as in index into the
22883# two tables below to fetch the default result and the result ccodes.
22884#
22885ovf_res_load:
22886	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
22887	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
22888
22889	rts
22890
22891tbl_ovfl_cc:
22892	byte		0x2, 0x0, 0x0, 0x2
22893	byte		0x2, 0x0, 0x0, 0x2
22894	byte		0x2, 0x0, 0x0, 0x2
22895	byte		0x0, 0x0, 0x0, 0x0
22896	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22897	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22898	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22899
22900tbl_ovfl_result:
22901	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22902	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
22903	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
22904	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22905
22906	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22907	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
22908	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
22909	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22910
22911	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22912	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
22913	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
22914	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22915
22916	long		0x00000000,0x00000000,0x00000000,0x00000000
22917	long		0x00000000,0x00000000,0x00000000,0x00000000
22918	long		0x00000000,0x00000000,0x00000000,0x00000000
22919	long		0x00000000,0x00000000,0x00000000,0x00000000
22920
22921	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22922	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
22923	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22924	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
22925
22926	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22927	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
22928	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22929	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
22930
22931	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22932	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
22933	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22934	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
22935
22936#########################################################################
22937# XDEF ****************************************************************	#
22938#	get_packed(): fetch a packed operand from memory and then	#
22939#		      convert it to a floating-point binary number.	#
22940#									#
22941# XREF ****************************************************************	#
22942#	_dcalc_ea() - calculate the correct <ea>			#
22943#	_mem_read() - fetch the packed operand from memory		#
22944#	facc_in_x() - the fetch failed so jump to special exit code	#
22945#	decbin()    - convert packed to binary extended precision	#
22946#									#
22947# INPUT ***************************************************************	#
22948#	None								#
22949# 									#
22950# OUTPUT **************************************************************	#
22951#	If no failure on _mem_read():					#
22952# 	FP_SRC(a6) = packed operand now as a binary FP number		#
22953#									#
22954# ALGORITHM ***********************************************************	#
22955#	Get the correct <ea> whihc is the value on the exception stack 	#
22956# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
22957# Then, fetch the operand from memory. If the fetch fails, exit		#
22958# through facc_in_x().							#
22959#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
22960# its binary representation here. Else, call decbin() which will 	#
22961# convert the packed value to an extended precision binary value.	#
22962#									#
22963#########################################################################
22964
22965# the stacked <ea> for packed is correct except for -(An).
22966# the base reg must be updated for both -(An) and (An)+.
22967	global		get_packed
22968get_packed:
22969	mov.l		&0xc,%d0		# packed is 12 bytes
22970	bsr.l		_dcalc_ea		# fetch <ea>; correct An
22971
22972	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
22973	mov.l		&0xc,%d0		# pass: 12 bytes
22974	bsr.l		_dmem_read		# read packed operand
22975
22976	tst.l		%d1			# did dfetch fail?
22977	bne.l		facc_in_x		# yes
22978
22979# The packed operand is an INF or a NAN if the exponent field is all ones.
22980	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
22981	cmpi.w		%d0,&0x7fff		# INF or NAN?
22982	bne.b		gp_try_zero		# no
22983	rts					# operand is an INF or NAN
22984
22985# The packed operand is a zero if the mantissa is all zero, else it's
22986# a normal packed op.
22987gp_try_zero:
22988	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
22989	andi.b		&0x0f,%d0		# clear all but last nybble
22990	bne.b		gp_not_spec		# not a zero
22991	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
22992	bne.b		gp_not_spec		# not a zero
22993	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
22994	bne.b		gp_not_spec		# not a zero
22995	rts					# operand is a ZERO
22996gp_not_spec:
22997	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
22998	bsr.l		decbin			# convert to extended
22999	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
23000	rts
23001
23002#########################################################################
23003# decbin(): Converts normalized packed bcd value pointed to by register	#
23004#	    a0 to extended-precision value in fp0.			#
23005#									#
23006# INPUT ***************************************************************	#
23007#	a0 = pointer to normalized packed bcd value			#
23008#									#
23009# OUTPUT **************************************************************	#
23010#	fp0 = exact fp representation of the packed bcd value.		#
23011#									#
23012# ALGORITHM ***********************************************************	#
23013#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
23014#	and NaN operands are dispatched without entering this routine)	#
23015#	value in 68881/882 format at location (a0).			#
23016#									#
23017#	A1. Convert the bcd exponent to binary by successive adds and 	#
23018#	muls. Set the sign according to SE. Subtract 16 to compensate	#
23019#	for the mantissa which is to be interpreted as 17 integer	#
23020#	digits, rather than 1 integer and 16 fraction digits.		#
23021#	Note: this operation can never overflow.			#
23022#									#
23023#	A2. Convert the bcd mantissa to binary by successive		#
23024#	adds and muls in FP0. Set the sign according to SM.		#
23025#	The mantissa digits will be converted with the decimal point	#
23026#	assumed following the least-significant digit.			#
23027#	Note: this operation can never overflow.			#
23028#									#
23029#	A3. Count the number of leading/trailing zeros in the		#
23030#	bcd string.  If SE is positive, count the leading zeros;	#
23031#	if negative, count the trailing zeros.  Set the adjusted	#
23032#	exponent equal to the exponent from A1 and the zero count	#
23033#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
23034#	mantissa the equivalent of forcing in the bcd value:		#
23035#									#
23036#	SM = 0	a non-zero digit in the integer position		#
23037#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
23038#									#
23039#	this will insure that any value, regardless of its		#
23040#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
23041#	consistently.							#
23042#									#
23043#	A4. Calculate the factor 10^exp in FP1 using a table of		#
23044#	10^(2^n) values.  To reduce the error in forming factors	#
23045#	greater than 10^27, a directed rounding scheme is used with	#
23046#	tables rounded to RN, RM, and RP, according to the table	#
23047#	in the comments of the pwrten section.				#
23048#									#
23049#	A5. Form the final binary number by scaling the mantissa by	#
23050#	the exponent factor.  This is done by multiplying the		#
23051#	mantissa in FP0 by the factor in FP1 if the adjusted		#
23052#	exponent sign is positive, and dividing FP0 by FP1 if		#
23053#	it is negative.							#
23054#									#
23055#	Clean up and return. Check if the final mul or div was inexact.	#
23056#	If so, set INEX1 in USER_FPSR.					#
23057#									#
23058#########################################################################
23059
23060#
23061#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
23062#	to nearest, minus, and plus, respectively.  The tables include
23063#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
23064#	is required until the power is greater than 27, however, all
23065#	tables include the first 5 for ease of indexing.
23066#
23067RTABLE:
23068	byte		0,0,0,0
23069	byte		2,3,2,3
23070	byte		2,3,3,2
23071	byte		3,2,2,3
23072
23073	set		FNIBS,7
23074	set		FSTRT,0
23075
23076	set		ESTRT,4
23077	set		EDIGITS,2
23078
23079	global		decbin
23080decbin:
23081	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
23082	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
23083	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
23084
23085	lea		FP_SCR0(%a6),%a0
23086
23087	movm.l		&0x3c00,-(%sp)		# save d2-d5
23088	fmovm.x		&0x1,-(%sp)		# save fp1
23089#
23090# Calculate exponent:
23091#  1. Copy bcd value in memory for use as a working copy.
23092#  2. Calculate absolute value of exponent in d1 by mul and add.
23093#  3. Correct for exponent sign.
23094#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
23095#     (i.e., all digits assumed left of the decimal point.)
23096#
23097# Register usage:
23098#
23099#  calc_e:
23100#	(*)  d0: temp digit storage
23101#	(*)  d1: accumulator for binary exponent
23102#	(*)  d2: digit count
23103#	(*)  d3: offset pointer
23104#	( )  d4: first word of bcd
23105#	( )  a0: pointer to working bcd value
23106#	( )  a6: pointer to original bcd value
23107#	(*)  FP_SCR1: working copy of original bcd value
23108#	(*)  L_SCR1: copy of original exponent word
23109#
23110calc_e:
23111	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
23112	mov.l		&ESTRT,%d3		# counter to pick up digits
23113	mov.l		(%a0),%d4		# get first word of bcd
23114	clr.l		%d1			# zero d1 for accumulator
23115e_gd:
23116	mulu.l		&0xa,%d1		# mul partial product by one digit place
23117	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
23118	add.l		%d0,%d1			# d1 = d1 + d0
23119	addq.b		&4,%d3			# advance d3 to the next digit
23120	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
23121	btst		&30,%d4			# get SE
23122	beq.b		e_pos			# don't negate if pos
23123	neg.l		%d1			# negate before subtracting
23124e_pos:
23125	sub.l		&16,%d1			# sub to compensate for shift of mant
23126	bge.b		e_save			# if still pos, do not neg
23127	neg.l		%d1			# now negative, make pos and set SE
23128	or.l		&0x40000000,%d4		# set SE in d4,
23129	or.l		&0x40000000,(%a0)	# and in working bcd
23130e_save:
23131	mov.l		%d1,-(%sp)		# save exp on stack
23132#
23133#
23134# Calculate mantissa:
23135#  1. Calculate absolute value of mantissa in fp0 by mul and add.
23136#  2. Correct for mantissa sign.
23137#     (i.e., all digits assumed left of the decimal point.)
23138#
23139# Register usage:
23140#
23141#  calc_m:
23142#	(*)  d0: temp digit storage
23143#	(*)  d1: lword counter
23144#	(*)  d2: digit count
23145#	(*)  d3: offset pointer
23146#	( )  d4: words 2 and 3 of bcd
23147#	( )  a0: pointer to working bcd value
23148#	( )  a6: pointer to original bcd value
23149#	(*) fp0: mantissa accumulator
23150#	( )  FP_SCR1: working copy of original bcd value
23151#	( )  L_SCR1: copy of original exponent word
23152#
23153calc_m:
23154	mov.l		&1,%d1			# word counter, init to 1
23155	fmov.s		&0x00000000,%fp0	# accumulator
23156#
23157#
23158#  Since the packed number has a long word between the first & second parts,
23159#  get the integer digit then skip down & get the rest of the
23160#  mantissa.  We will unroll the loop once.
23161#
23162	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
23163	fadd.b		%d0,%fp0		# add digit to sum in fp0
23164#
23165#
23166#  Get the rest of the mantissa.
23167#
23168loadlw:
23169	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
23170	mov.l		&FSTRT,%d3		# counter to pick up digits
23171	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
23172md2b:
23173	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
23174	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
23175	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
23176#
23177#
23178#  If all the digits (8) in that long word have been converted (d2=0),
23179#  then inc d1 (=2) to point to the next long word and reset d3 to 0
23180#  to initialize the digit offset, and set d2 to 7 for the digit count;
23181#  else continue with this long word.
23182#
23183	addq.b		&4,%d3			# advance d3 to the next digit
23184	dbf.w		%d2,md2b		# check for last digit in this lw
23185nextlw:
23186	addq.l		&1,%d1			# inc lw pointer in mantissa
23187	cmp.l		%d1,&2			# test for last lw
23188	ble.b		loadlw			# if not, get last one
23189#
23190#  Check the sign of the mant and make the value in fp0 the same sign.
23191#
23192m_sign:
23193	btst		&31,(%a0)		# test sign of the mantissa
23194	beq.b		ap_st_z			# if clear, go to append/strip zeros
23195	fneg.x		%fp0			# if set, negate fp0
23196#
23197# Append/strip zeros:
23198#
23199#  For adjusted exponents which have an absolute value greater than 27*,
23200#  this routine calculates the amount needed to normalize the mantissa
23201#  for the adjusted exponent.  That number is subtracted from the exp
23202#  if the exp was positive, and added if it was negative.  The purpose
23203#  of this is to reduce the value of the exponent and the possibility
23204#  of error in calculation of pwrten.
23205#
23206#  1. Branch on the sign of the adjusted exponent.
23207#  2p.(positive exp)
23208#   2. Check M16 and the digits in lwords 2 and 3 in decending order.
23209#   3. Add one for each zero encountered until a non-zero digit.
23210#   4. Subtract the count from the exp.
23211#   5. Check if the exp has crossed zero in #3 above; make the exp abs
23212#	   and set SE.
23213#	6. Multiply the mantissa by 10**count.
23214#  2n.(negative exp)
23215#   2. Check the digits in lwords 3 and 2 in decending order.
23216#   3. Add one for each zero encountered until a non-zero digit.
23217#   4. Add the count to the exp.
23218#   5. Check if the exp has crossed zero in #3 above; clear SE.
23219#   6. Divide the mantissa by 10**count.
23220#
23221#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
23222#   any adjustment due to append/strip zeros will drive the resultane
23223#   exponent towards zero.  Since all pwrten constants with a power
23224#   of 27 or less are exact, there is no need to use this routine to
23225#   attempt to lessen the resultant exponent.
23226#
23227# Register usage:
23228#
23229#  ap_st_z:
23230#	(*)  d0: temp digit storage
23231#	(*)  d1: zero count
23232#	(*)  d2: digit count
23233#	(*)  d3: offset pointer
23234#	( )  d4: first word of bcd
23235#	(*)  d5: lword counter
23236#	( )  a0: pointer to working bcd value
23237#	( )  FP_SCR1: working copy of original bcd value
23238#	( )  L_SCR1: copy of original exponent word
23239#
23240#
23241# First check the absolute value of the exponent to see if this
23242# routine is necessary.  If so, then check the sign of the exponent
23243# and do append (+) or strip (-) zeros accordingly.
23244# This section handles a positive adjusted exponent.
23245#
23246ap_st_z:
23247	mov.l		(%sp),%d1		# load expA for range test
23248	cmp.l		%d1,&27			# test is with 27
23249	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
23250	btst		&30,(%a0)		# check sign of exp
23251	bne.b		ap_st_n			# if neg, go to neg side
23252	clr.l		%d1			# zero count reg
23253	mov.l		(%a0),%d4		# load lword 1 to d4
23254	bfextu		%d4{&28:&4},%d0		# get M16 in d0
23255	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
23256	addq.l		&1,%d1			# inc zero count
23257	mov.l		&1,%d5			# init lword counter
23258	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
23259	bne.b		ap_p_cl			# if lw 2 is zero, skip it
23260	addq.l		&8,%d1			# and inc count by 8
23261	addq.l		&1,%d5			# inc lword counter
23262	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
23263ap_p_cl:
23264	clr.l		%d3			# init offset reg
23265	mov.l		&7,%d2			# init digit counter
23266ap_p_gd:
23267	bfextu		%d4{%d3:&4},%d0		# get digit
23268	bne.b		ap_p_fx			# if non-zero, go to fix exp
23269	addq.l		&4,%d3			# point to next digit
23270	addq.l		&1,%d1			# inc digit counter
23271	dbf.w		%d2,ap_p_gd		# get next digit
23272ap_p_fx:
23273	mov.l		%d1,%d0			# copy counter to d2
23274	mov.l		(%sp),%d1		# get adjusted exp from memory
23275	sub.l		%d0,%d1			# subtract count from exp
23276	bge.b		ap_p_fm			# if still pos, go to pwrten
23277	neg.l		%d1			# now its neg; get abs
23278	mov.l		(%a0),%d4		# load lword 1 to d4
23279	or.l		&0x40000000,%d4		# and set SE in d4
23280	or.l		&0x40000000,(%a0)	# and in memory
23281#
23282# Calculate the mantissa multiplier to compensate for the striping of
23283# zeros from the mantissa.
23284#
23285ap_p_fm:
23286	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
23287	clr.l		%d3			# init table index
23288	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23289	mov.l		&3,%d2			# init d2 to count bits in counter
23290ap_p_el:
23291	asr.l		&1,%d0			# shift lsb into carry
23292	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
23293	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23294ap_p_en:
23295	add.l		&12,%d3			# inc d3 to next rtable entry
23296	tst.l		%d0			# check if d0 is zero
23297	bne.b		ap_p_el			# if not, get next bit
23298	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
23299	bra.b		pwrten			# go calc pwrten
23300#
23301# This section handles a negative adjusted exponent.
23302#
23303ap_st_n:
23304	clr.l		%d1			# clr counter
23305	mov.l		&2,%d5			# set up d5 to point to lword 3
23306	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
23307	bne.b		ap_n_cl			# if not zero, check digits
23308	sub.l		&1,%d5			# dec d5 to point to lword 2
23309	addq.l		&8,%d1			# inc counter by 8
23310	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
23311ap_n_cl:
23312	mov.l		&28,%d3			# point to last digit
23313	mov.l		&7,%d2			# init digit counter
23314ap_n_gd:
23315	bfextu		%d4{%d3:&4},%d0		# get digit
23316	bne.b		ap_n_fx			# if non-zero, go to exp fix
23317	subq.l		&4,%d3			# point to previous digit
23318	addq.l		&1,%d1			# inc digit counter
23319	dbf.w		%d2,ap_n_gd		# get next digit
23320ap_n_fx:
23321	mov.l		%d1,%d0			# copy counter to d0
23322	mov.l		(%sp),%d1		# get adjusted exp from memory
23323	sub.l		%d0,%d1			# subtract count from exp
23324	bgt.b		ap_n_fm			# if still pos, go fix mantissa
23325	neg.l		%d1			# take abs of exp and clr SE
23326	mov.l		(%a0),%d4		# load lword 1 to d4
23327	and.l		&0xbfffffff,%d4		# and clr SE in d4
23328	and.l		&0xbfffffff,(%a0)	# and in memory
23329#
23330# Calculate the mantissa multiplier to compensate for the appending of
23331# zeros to the mantissa.
23332#
23333ap_n_fm:
23334	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
23335	clr.l		%d3			# init table index
23336	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23337	mov.l		&3,%d2			# init d2 to count bits in counter
23338ap_n_el:
23339	asr.l		&1,%d0			# shift lsb into carry
23340	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
23341	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23342ap_n_en:
23343	add.l		&12,%d3			# inc d3 to next rtable entry
23344	tst.l		%d0			# check if d0 is zero
23345	bne.b		ap_n_el			# if not, get next bit
23346	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
23347#
23348#
23349# Calculate power-of-ten factor from adjusted and shifted exponent.
23350#
23351# Register usage:
23352#
23353#  pwrten:
23354#	(*)  d0: temp
23355#	( )  d1: exponent
23356#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
23357#	(*)  d3: FPCR work copy
23358#	( )  d4: first word of bcd
23359#	(*)  a1: RTABLE pointer
23360#  calc_p:
23361#	(*)  d0: temp
23362#	( )  d1: exponent
23363#	(*)  d3: PWRTxx table index
23364#	( )  a0: pointer to working copy of bcd
23365#	(*)  a1: PWRTxx pointer
23366#	(*) fp1: power-of-ten accumulator
23367#
23368# Pwrten calculates the exponent factor in the selected rounding mode
23369# according to the following table:
23370#
23371#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
23372#
23373#	ANY	  ANY	RN	RN
23374#
23375#	 +	   +	RP	RP
23376#	 -	   +	RP	RM
23377#	 +	   -	RP	RM
23378#	 -	   -	RP	RP
23379#
23380#	 +	   +	RM	RM
23381#	 -	   +	RM	RP
23382#	 +	   -	RM	RP
23383#	 -	   -	RM	RM
23384#
23385#	 +	   +	RZ	RM
23386#	 -	   +	RZ	RM
23387#	 +	   -	RZ	RP
23388#	 -	   -	RZ	RP
23389#
23390#
23391pwrten:
23392	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
23393	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
23394	mov.l		(%a0),%d4		# reload 1st bcd word to d4
23395	asl.l		&2,%d2			# format d2 to be
23396	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
23397	add.l		%d0,%d2			# in d2 as index into RTABLE
23398	lea.l		RTABLE(%pc),%a1		# load rtable base
23399	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
23400	clr.l		%d3			# clear d3 to force no exc and extended
23401	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
23402	fmov.l		%d3,%fpcr		# write new FPCR
23403	asr.l		&1,%d0			# write correct PTENxx table
23404	bcc.b		not_rp			# to a1
23405	lea.l		PTENRP(%pc),%a1		# it is RP
23406	bra.b		calc_p			# go to init section
23407not_rp:
23408	asr.l		&1,%d0			# keep checking
23409	bcc.b		not_rm
23410	lea.l		PTENRM(%pc),%a1		# it is RM
23411	bra.b		calc_p			# go to init section
23412not_rm:
23413	lea.l		PTENRN(%pc),%a1		# it is RN
23414calc_p:
23415	mov.l		%d1,%d0			# copy exp to d0;use d0
23416	bpl.b		no_neg			# if exp is negative,
23417	neg.l		%d0			# invert it
23418	or.l		&0x40000000,(%a0)	# and set SE bit
23419no_neg:
23420	clr.l		%d3			# table index
23421	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23422e_loop:
23423	asr.l		&1,%d0			# shift next bit into carry
23424	bcc.b		e_next			# if zero, skip the mul
23425	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23426e_next:
23427	add.l		&12,%d3			# inc d3 to next rtable entry
23428	tst.l		%d0			# check if d0 is zero
23429	bne.b		e_loop			# not zero, continue shifting
23430#
23431#
23432#  Check the sign of the adjusted exp and make the value in fp0 the
23433#  same sign. If the exp was pos then multiply fp1*fp0;
23434#  else divide fp0/fp1.
23435#
23436# Register Usage:
23437#  norm:
23438#	( )  a0: pointer to working bcd value
23439#	(*) fp0: mantissa accumulator
23440#	( ) fp1: scaling factor - 10**(abs(exp))
23441#
23442pnorm:
23443	btst		&30,(%a0)		# test the sign of the exponent
23444	beq.b		mul			# if clear, go to multiply
23445div:
23446	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
23447	bra.b		end_dec
23448mul:
23449	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
23450#
23451#
23452# Clean up and return with result in fp0.
23453#
23454# If the final mul/div in decbin incurred an inex exception,
23455# it will be inex2, but will be reported as inex1 by get_op.
23456#
23457end_dec:
23458	fmov.l		%fpsr,%d0		# get status register
23459	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
23460	beq.b		no_exc			# skip this if no exc
23461	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
23462no_exc:
23463	add.l		&0x4,%sp		# clear 1 lw param
23464	fmovm.x		(%sp)+,&0x40		# restore fp1
23465	movm.l		(%sp)+,&0x3c		# restore d2-d5
23466	fmov.l		&0x0,%fpcr
23467	fmov.l		&0x0,%fpsr
23468	rts
23469
23470#########################################################################
23471# bindec(): Converts an input in extended precision format to bcd format#
23472#									#
23473# INPUT ***************************************************************	#
23474#	a0 = pointer to the input extended precision value in memory.	#
23475#	     the input may be either normalized, unnormalized, or 	#
23476#	     denormalized.						#
23477#	d0 = contains the k-factor sign-extended to 32-bits. 		#
23478#									#
23479# OUTPUT **************************************************************	#
23480#	FP_SCR0(a6) = bcd format result on the stack.			#
23481#									#
23482# ALGORITHM ***********************************************************	#
23483#									#
23484#	A1.	Set RM and size ext;  Set SIGMA = sign of input.  	#
23485#		The k-factor is saved for use in d7. Clear the		#
23486#		BINDEC_FLG for separating normalized/denormalized	#
23487#		input.  If input is unnormalized or denormalized,	#
23488#		normalize it.						#
23489#									#
23490#	A2.	Set X = abs(input).					#
23491#									#
23492#	A3.	Compute ILOG.						#
23493#		ILOG is the log base 10 of the input value.  It is	#
23494#		approximated by adding e + 0.f when the original 	#
23495#		value is viewed as 2^^e * 1.f in extended precision.  	#
23496#		This value is stored in d6.				#
23497#									#
23498#	A4.	Clr INEX bit.						#
23499#		The operation in A3 above may have set INEX2.  		#
23500#									#
23501#	A5.	Set ICTR = 0;						#
23502#		ICTR is a flag used in A13.  It must be set before the 	#
23503#		loop entry A6.						#
23504#									#
23505#	A6.	Calculate LEN.						#
23506#		LEN is the number of digits to be displayed.  The	#
23507#		k-factor can dictate either the total number of digits,	#
23508#		if it is a positive number, or the number of digits	#
23509#		after the decimal point which are to be included as	#
23510#		significant.  See the 68882 manual for examples.	#
23511#		If LEN is computed to be greater than 17, set OPERR in	#
23512#		USER_FPSR.  LEN is stored in d4.			#
23513#									#
23514#	A7.	Calculate SCALE.					#
23515#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
23516#		of decimal places needed to insure LEN integer digits	#
23517#		in the output before conversion to bcd. LAMBDA is the	#
23518#		sign of ISCALE, used in A9. Fp1 contains		#
23519#		10^^(abs(ISCALE)) using a rounding mode which is a	#
23520#		function of the original rounding mode and the signs	#
23521#		of ISCALE and X.  A table is given in the code.		#
23522#									#
23523#	A8.	Clr INEX; Force RZ.					#
23524#		The operation in A3 above may have set INEX2.  		#
23525#		RZ mode is forced for the scaling operation to insure	#
23526#		only one rounding error.  The grs bits are collected in #
23527#		the INEX flag for use in A10.				#
23528#									#
23529#	A9.	Scale X -> Y.						#
23530#		The mantissa is scaled to the desired number of		#
23531#		significant digits.  The excess digits are collected	#
23532#		in INEX2.						#
23533#									#
23534#	A10.	Or in INEX.						#
23535#		If INEX is set, round error occurred.  This is		#
23536#		compensated for by 'or-ing' in the INEX2 flag to	#
23537#		the lsb of Y.						#
23538#									#
23539#	A11.	Restore original FPCR; set size ext.			#
23540#		Perform FINT operation in the user's rounding mode.	#
23541#		Keep the size to extended.				#
23542#									#
23543#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
23544#		mode.  The FPSP routine sintd0 is used.  The output	#
23545#		is in fp0.						#
23546#									#
23547#	A13.	Check for LEN digits.					#
23548#		If the int operation results in more than LEN digits,	#
23549#		or less than LEN -1 digits, adjust ILOG and repeat from	#
23550#		A6.  This test occurs only on the first pass.  If the	#
23551#		result is exactly 10^LEN, decrement ILOG and divide	#
23552#		the mantissa by 10.					#
23553#									#
23554#	A14.	Convert the mantissa to bcd.				#
23555#		The binstr routine is used to convert the LEN digit 	#
23556#		mantissa to bcd in memory.  The input to binstr is	#
23557#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
23558#		such that the decimal point is to the left of bit 63.	#
23559#		The bcd digits are stored in the correct position in 	#
23560#		the final string area in memory.			#
23561#									#
23562#	A15.	Convert the exponent to bcd.				#
23563#		As in A14 above, the exp is converted to bcd and the	#
23564#		digits are stored in the final string.			#
23565#		Test the length of the final exponent string.  If the	#
23566#		length is 4, set operr.					#
23567#									#
23568#	A16.	Write sign bits to final string.			#
23569#									#
23570#########################################################################
23571
23572set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
23573
23574# Constants in extended precision
23575PLOG2:
23576	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
23577PLOG2UP1:
23578	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
23579
23580# Constants in single precision
23581FONE:
23582	long		0x3F800000,0x00000000,0x00000000,0x00000000
23583FTWO:
23584	long		0x40000000,0x00000000,0x00000000,0x00000000
23585FTEN:
23586	long		0x41200000,0x00000000,0x00000000,0x00000000
23587F4933:
23588	long		0x459A2800,0x00000000,0x00000000,0x00000000
23589
23590RBDTBL:
23591	byte		0,0,0,0
23592	byte		3,3,2,2
23593	byte		3,2,2,3
23594	byte		2,3,3,2
23595
23596#	Implementation Notes:
23597#
23598#	The registers are used as follows:
23599#
23600#		d0: scratch; LEN input to binstr
23601#		d1: scratch
23602#		d2: upper 32-bits of mantissa for binstr
23603#		d3: scratch;lower 32-bits of mantissa for binstr
23604#		d4: LEN
23605#      		d5: LAMBDA/ICTR
23606#		d6: ILOG
23607#		d7: k-factor
23608#		a0: ptr for original operand/final result
23609#		a1: scratch pointer
23610#		a2: pointer to FP_X; abs(original value) in ext
23611#		fp0: scratch
23612#		fp1: scratch
23613#		fp2: scratch
23614#		F_SCR1:
23615#		F_SCR2:
23616#		L_SCR1:
23617#		L_SCR2:
23618
23619	global		bindec
23620bindec:
23621	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
23622	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
23623
23624# A1. Set RM and size ext. Set SIGMA = sign input;
23625#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
23626#     separating  normalized/denormalized input.  If the input
23627#     is a denormalized number, set the BINDEC_FLG memory word
23628#     to signal denorm.  If the input is unnormalized, normalize
23629#     the input and test for denormalized result.
23630#
23631	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
23632	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
23633	mov.l		%d0,%d7		# move k-factor to d7
23634
23635	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
23636	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
23637	bne.w		A2_str		# no; input is a NORM
23638
23639#
23640# Normalize the denorm
23641#
23642un_de_norm:
23643	mov.w		(%a0),%d0
23644	and.w		&0x7fff,%d0	# strip sign of normalized exp
23645	mov.l		4(%a0),%d1
23646	mov.l		8(%a0),%d2
23647norm_loop:
23648	sub.w		&1,%d0
23649	lsl.l		&1,%d2
23650	roxl.l		&1,%d1
23651	tst.l		%d1
23652	bge.b		norm_loop
23653#
23654# Test if the normalized input is denormalized
23655#
23656	tst.w		%d0
23657	bgt.b		pos_exp		# if greater than zero, it is a norm
23658	st		BINDEC_FLG(%a6)	# set flag for denorm
23659pos_exp:
23660	and.w		&0x7fff,%d0	# strip sign of normalized exp
23661	mov.w		%d0,(%a0)
23662	mov.l		%d1,4(%a0)
23663	mov.l		%d2,8(%a0)
23664
23665# A2. Set X = abs(input).
23666#
23667A2_str:
23668	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
23669	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
23670	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
23671	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
23672
23673# A3. Compute ILOG.
23674#     ILOG is the log base 10 of the input value.  It is approx-
23675#     imated by adding e + 0.f when the original value is viewed
23676#     as 2^^e * 1.f in extended precision.  This value is stored
23677#     in d6.
23678#
23679# Register usage:
23680#	Input/Output
23681#	d0: k-factor/exponent
23682#	d2: x/x
23683#	d3: x/x
23684#	d4: x/x
23685#	d5: x/x
23686#	d6: x/ILOG
23687#	d7: k-factor/Unchanged
23688#	a0: ptr for original operand/final result
23689#	a1: x/x
23690#	a2: x/x
23691#	fp0: x/float(ILOG)
23692#	fp1: x/x
23693#	fp2: x/x
23694#	F_SCR1:x/x
23695#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
23696#	L_SCR1:x/x
23697#	L_SCR2:first word of X packed/Unchanged
23698
23699	tst.b		BINDEC_FLG(%a6)	# check for denorm
23700	beq.b		A3_cont		# if clr, continue with norm
23701	mov.l		&-4933,%d6	# force ILOG = -4933
23702	bra.b		A4_str
23703A3_cont:
23704	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
23705	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
23706	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
23707	sub.w		&0x3fff,%d0	# strip off bias
23708	fadd.w		%d0,%fp0	# add in exp
23709	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
23710	fbge.w		pos_res		# if pos, branch
23711	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
23712	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
23713	bra.b		A4_str		# go move out ILOG
23714pos_res:
23715	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
23716	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
23717
23718
23719# A4. Clr INEX bit.
23720#     The operation in A3 above may have set INEX2.
23721
23722A4_str:
23723	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
23724
23725
23726# A5. Set ICTR = 0;
23727#     ICTR is a flag used in A13.  It must be set before the
23728#     loop entry A6. The lower word of d5 is used for ICTR.
23729
23730	clr.w		%d5		# clear ICTR
23731
23732# A6. Calculate LEN.
23733#     LEN is the number of digits to be displayed.  The k-factor
23734#     can dictate either the total number of digits, if it is
23735#     a positive number, or the number of digits after the
23736#     original decimal point which are to be included as
23737#     significant.  See the 68882 manual for examples.
23738#     If LEN is computed to be greater than 17, set OPERR in
23739#     USER_FPSR.  LEN is stored in d4.
23740#
23741# Register usage:
23742#	Input/Output
23743#	d0: exponent/Unchanged
23744#	d2: x/x/scratch
23745#	d3: x/x
23746#	d4: exc picture/LEN
23747#	d5: ICTR/Unchanged
23748#	d6: ILOG/Unchanged
23749#	d7: k-factor/Unchanged
23750#	a0: ptr for original operand/final result
23751#	a1: x/x
23752#	a2: x/x
23753#	fp0: float(ILOG)/Unchanged
23754#	fp1: x/x
23755#	fp2: x/x
23756#	F_SCR1:x/x
23757#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23758#	L_SCR1:x/x
23759#	L_SCR2:first word of X packed/Unchanged
23760
23761A6_str:
23762	tst.l		%d7		# branch on sign of k
23763	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
23764	mov.l		%d7,%d4		# if k > 0, LEN = k
23765	bra.b		len_ck		# skip to LEN check
23766k_neg:
23767	mov.l		%d6,%d4		# first load ILOG to d4
23768	sub.l		%d7,%d4		# subtract off k
23769	addq.l		&1,%d4		# add in the 1
23770len_ck:
23771	tst.l		%d4		# LEN check: branch on sign of LEN
23772	ble.b		LEN_ng		# if neg, set LEN = 1
23773	cmp.l		%d4,&17		# test if LEN > 17
23774	ble.b		A7_str		# if not, forget it
23775	mov.l		&17,%d4		# set max LEN = 17
23776	tst.l		%d7		# if negative, never set OPERR
23777	ble.b		A7_str		# if positive, continue
23778	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
23779	bra.b		A7_str		# finished here
23780LEN_ng:
23781	mov.l		&1,%d4		# min LEN is 1
23782
23783
23784# A7. Calculate SCALE.
23785#     SCALE is equal to 10^ISCALE, where ISCALE is the number
23786#     of decimal places needed to insure LEN integer digits
23787#     in the output before conversion to bcd. LAMBDA is the sign
23788#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
23789#     the rounding mode as given in the following table (see
23790#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
23791#     of opposite sign in bindec.sa from Coonen).
23792#
23793#	Initial					USE
23794#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
23795#	----------------------------------------------
23796#	 RN	00	   0	   0		00/0	RN
23797#	 RN	00	   0	   1		00/0	RN
23798#	 RN	00	   1	   0		00/0	RN
23799#	 RN	00	   1	   1		00/0	RN
23800#	 RZ	01	   0	   0		11/3	RP
23801#	 RZ	01	   0	   1		11/3	RP
23802#	 RZ	01	   1	   0		10/2	RM
23803#	 RZ	01	   1	   1		10/2	RM
23804#	 RM	10	   0	   0		11/3	RP
23805#	 RM	10	   0	   1		10/2	RM
23806#	 RM	10	   1	   0		10/2	RM
23807#	 RM	10	   1	   1		11/3	RP
23808#	 RP	11	   0	   0		10/2	RM
23809#	 RP	11	   0	   1		11/3	RP
23810#	 RP	11	   1	   0		11/3	RP
23811#	 RP	11	   1	   1		10/2	RM
23812#
23813# Register usage:
23814#	Input/Output
23815#	d0: exponent/scratch - final is 0
23816#	d2: x/0 or 24 for A9
23817#	d3: x/scratch - offset ptr into PTENRM array
23818#	d4: LEN/Unchanged
23819#	d5: 0/ICTR:LAMBDA
23820#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
23821#	d7: k-factor/Unchanged
23822#	a0: ptr for original operand/final result
23823#	a1: x/ptr to PTENRM array
23824#	a2: x/x
23825#	fp0: float(ILOG)/Unchanged
23826#	fp1: x/10^ISCALE
23827#	fp2: x/x
23828#	F_SCR1:x/x
23829#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23830#	L_SCR1:x/x
23831#	L_SCR2:first word of X packed/Unchanged
23832
23833A7_str:
23834	tst.l		%d7		# test sign of k
23835	bgt.b		k_pos		# if pos and > 0, skip this
23836	cmp.l		%d7,%d6		# test k - ILOG
23837	blt.b		k_pos		# if ILOG >= k, skip this
23838	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
23839k_pos:
23840	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
23841	addq.l		&1,%d0		# add the 1
23842	sub.l		%d4,%d0		# sub off LEN
23843	swap		%d5		# use upper word of d5 for LAMBDA
23844	clr.w		%d5		# set it zero initially
23845	clr.w		%d2		# set up d2 for very small case
23846	tst.l		%d0		# test sign of ISCALE
23847	bge.b		iscale		# if pos, skip next inst
23848	addq.w		&1,%d5		# if neg, set LAMBDA true
23849	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
23850	bgt.b		no_inf		# if false, skip rest
23851	add.l		&24,%d0		# add in 24 to iscale
23852	mov.l		&24,%d2		# put 24 in d2 for A9
23853no_inf:
23854	neg.l		%d0		# and take abs of ISCALE
23855iscale:
23856	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
23857	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
23858	lsl.w		&1,%d1		# put them in bits 2:1
23859	add.w		%d5,%d1		# add in LAMBDA
23860	lsl.w		&1,%d1		# put them in bits 3:1
23861	tst.l		L_SCR2(%a6)	# test sign of original x
23862	bge.b		x_pos		# if pos, don't set bit 0
23863	addq.l		&1,%d1		# if neg, set bit 0
23864x_pos:
23865	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
23866	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
23867	lsl.l		&4,%d3		# put bits in proper position
23868	fmov.l		%d3,%fpcr	# load bits into fpu
23869	lsr.l		&4,%d3		# put bits in proper position
23870	tst.b		%d3		# decode new rmode for pten table
23871	bne.b		not_rn		# if zero, it is RN
23872	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
23873	bra.b		rmode		# exit decode
23874not_rn:
23875	lsr.b		&1,%d3		# get lsb in carry
23876	bcc.b		not_rp2		# if carry clear, it is RM
23877	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
23878	bra.b		rmode		# exit decode
23879not_rp2:
23880	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
23881rmode:
23882	clr.l		%d3		# clr table index
23883e_loop2:
23884	lsr.l		&1,%d0		# shift next bit into carry
23885	bcc.b		e_next2		# if zero, skip the mul
23886	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
23887e_next2:
23888	add.l		&12,%d3		# inc d3 to next pwrten table entry
23889	tst.l		%d0		# test if ISCALE is zero
23890	bne.b		e_loop2		# if not, loop
23891
23892# A8. Clr INEX; Force RZ.
23893#     The operation in A3 above may have set INEX2.
23894#     RZ mode is forced for the scaling operation to insure
23895#     only one rounding error.  The grs bits are collected in
23896#     the INEX flag for use in A10.
23897#
23898# Register usage:
23899#	Input/Output
23900
23901	fmov.l		&0,%fpsr	# clr INEX
23902	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
23903
23904# A9. Scale X -> Y.
23905#     The mantissa is scaled to the desired number of significant
23906#     digits.  The excess digits are collected in INEX2. If mul,
23907#     Check d2 for excess 10 exponential value.  If not zero,
23908#     the iscale value would have caused the pwrten calculation
23909#     to overflow.  Only a negative iscale can cause this, so
23910#     multiply by 10^(d2), which is now only allowed to be 24,
23911#     with a multiply by 10^8 and 10^16, which is exact since
23912#     10^24 is exact.  If the input was denormalized, we must
23913#     create a busy stack frame with the mul command and the
23914#     two operands, and allow the fpu to complete the multiply.
23915#
23916# Register usage:
23917#	Input/Output
23918#	d0: FPCR with RZ mode/Unchanged
23919#	d2: 0 or 24/unchanged
23920#	d3: x/x
23921#	d4: LEN/Unchanged
23922#	d5: ICTR:LAMBDA
23923#	d6: ILOG/Unchanged
23924#	d7: k-factor/Unchanged
23925#	a0: ptr for original operand/final result
23926#	a1: ptr to PTENRM array/Unchanged
23927#	a2: x/x
23928#	fp0: float(ILOG)/X adjusted for SCALE (Y)
23929#	fp1: 10^ISCALE/Unchanged
23930#	fp2: x/x
23931#	F_SCR1:x/x
23932#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23933#	L_SCR1:x/x
23934#	L_SCR2:first word of X packed/Unchanged
23935
23936A9_str:
23937	fmov.x		(%a0),%fp0	# load X from memory
23938	fabs.x		%fp0		# use abs(X)
23939	tst.w		%d5		# LAMBDA is in lower word of d5
23940	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
23941	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
23942	bra.w		A10_st		# branch to A10
23943
23944sc_mul:
23945	tst.b		BINDEC_FLG(%a6)	# check for denorm
23946	beq.w		A9_norm		# if norm, continue with mul
23947
23948# for DENORM, we must calculate:
23949#	fp0 = input_op * 10^ISCALE * 10^24
23950# since the input operand is a DENORM, we can't multiply it directly.
23951# so, we do the multiplication of the exponents and mantissas separately.
23952# in this way, we avoid underflow on intermediate stages of the
23953# multiplication and guarantee a result without exception.
23954	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
23955
23956	mov.w		(%sp),%d3	# grab exponent
23957	andi.w		&0x7fff,%d3	# clear sign
23958	ori.w		&0x8000,(%a0)	# make DENORM exp negative
23959	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
23960	subi.w		&0x3fff,%d3	# subtract BIAS
23961	add.w		36(%a1),%d3
23962	subi.w		&0x3fff,%d3	# subtract BIAS
23963	add.w		48(%a1),%d3
23964	subi.w		&0x3fff,%d3	# subtract BIAS
23965
23966	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
23967
23968	andi.w		&0x8000,(%sp)	# keep sign
23969	or.w		%d3,(%sp)	# insert new exponent
23970	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
23971	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
23972	mov.l		0x4(%a0),-(%sp)
23973	mov.l		&0x3fff0000,-(%sp) # force exp to zero
23974	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
23975	fmul.x		(%sp)+,%fp0
23976
23977#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
23978#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
23979	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
23980	mov.l		36+4(%a1),-(%sp)
23981	mov.l		&0x3fff0000,-(%sp) # force exp to zero
23982	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
23983	mov.l		48+4(%a1),-(%sp)
23984	mov.l		&0x3fff0000,-(%sp)# force exp to zero
23985	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
23986	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
23987	bra.b		A10_st
23988
23989sc_mul_err:
23990	bra.b		sc_mul_err
23991
23992A9_norm:
23993	tst.w		%d2		# test for small exp case
23994	beq.b		A9_con		# if zero, continue as normal
23995	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
23996	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
23997A9_con:
23998	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
23999
24000# A10. Or in INEX.
24001#      If INEX is set, round error occurred.  This is compensated
24002#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
24003#
24004# Register usage:
24005#	Input/Output
24006#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
24007#	d2: x/x
24008#	d3: x/x
24009#	d4: LEN/Unchanged
24010#	d5: ICTR:LAMBDA
24011#	d6: ILOG/Unchanged
24012#	d7: k-factor/Unchanged
24013#	a0: ptr for original operand/final result
24014#	a1: ptr to PTENxx array/Unchanged
24015#	a2: x/ptr to FP_SCR1(a6)
24016#	fp0: Y/Y with lsb adjusted
24017#	fp1: 10^ISCALE/Unchanged
24018#	fp2: x/x
24019
24020A10_st:
24021	fmov.l		%fpsr,%d0	# get FPSR
24022	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
24023	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
24024	btst		&9,%d0		# check if INEX2 set
24025	beq.b		A11_st		# if clear, skip rest
24026	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
24027	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
24028
24029
24030# A11. Restore original FPCR; set size ext.
24031#      Perform FINT operation in the user's rounding mode.  Keep
24032#      the size to extended.  The sintdo entry point in the sint
24033#      routine expects the FPCR value to be in USER_FPCR for
24034#      mode and precision.  The original FPCR is saved in L_SCR1.
24035
24036A11_st:
24037	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
24038	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
24039#					;block exceptions
24040
24041
24042# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
24043#      The FPSP routine sintd0 is used.  The output is in fp0.
24044#
24045# Register usage:
24046#	Input/Output
24047#	d0: FPSR with AINEX cleared/FPCR with size set to ext
24048#	d2: x/x/scratch
24049#	d3: x/x
24050#	d4: LEN/Unchanged
24051#	d5: ICTR:LAMBDA/Unchanged
24052#	d6: ILOG/Unchanged
24053#	d7: k-factor/Unchanged
24054#	a0: ptr for original operand/src ptr for sintdo
24055#	a1: ptr to PTENxx array/Unchanged
24056#	a2: ptr to FP_SCR1(a6)/Unchanged
24057#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
24058#	fp0: Y/YINT
24059#	fp1: 10^ISCALE/Unchanged
24060#	fp2: x/x
24061#	F_SCR1:x/x
24062#	F_SCR2:Y adjusted for inex/Y with original exponent
24063#	L_SCR1:x/original USER_FPCR
24064#	L_SCR2:first word of X packed/Unchanged
24065
24066A12_st:
24067	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
24068	mov.l	L_SCR1(%a6),-(%sp)
24069	mov.l	L_SCR2(%a6),-(%sp)
24070
24071	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
24072	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
24073	tst.l		L_SCR2(%a6)	# test sign of original operand
24074	bge.b		do_fint12		# if pos, use Y
24075	or.l		&0x80000000,(%a0)	# if neg, use -Y
24076do_fint12:
24077	mov.l	USER_FPSR(%a6),-(%sp)
24078#	bsr	sintdo		# sint routine returns int in fp0
24079
24080	fmov.l	USER_FPCR(%a6),%fpcr
24081	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
24082##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
24083##	andi.l		&0x00000030,%d0
24084##	fmov.l		%d0,%fpcr
24085	fint.x		FP_SCR1(%a6),%fp0	# do fint()
24086	fmov.l	%fpsr,%d0
24087	or.w	%d0,FPSR_EXCEPT(%a6)
24088##	fmov.l		&0x0,%fpcr
24089##	fmov.l		%fpsr,%d0		# don't keep ccodes
24090##	or.w		%d0,FPSR_EXCEPT(%a6)
24091
24092	mov.b	(%sp),USER_FPSR(%a6)
24093	add.l	&4,%sp
24094
24095	mov.l	(%sp)+,L_SCR2(%a6)
24096	mov.l	(%sp)+,L_SCR1(%a6)
24097	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
24098
24099	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
24100	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
24101
24102# A13. Check for LEN digits.
24103#      If the int operation results in more than LEN digits,
24104#      or less than LEN -1 digits, adjust ILOG and repeat from
24105#      A6.  This test occurs only on the first pass.  If the
24106#      result is exactly 10^LEN, decrement ILOG and divide
24107#      the mantissa by 10.  The calculation of 10^LEN cannot
24108#      be inexact, since all powers of ten upto 10^27 are exact
24109#      in extended precision, so the use of a previous power-of-ten
24110#      table will introduce no error.
24111#
24112#
24113# Register usage:
24114#	Input/Output
24115#	d0: FPCR with size set to ext/scratch final = 0
24116#	d2: x/x
24117#	d3: x/scratch final = x
24118#	d4: LEN/LEN adjusted
24119#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24120#	d6: ILOG/ILOG adjusted
24121#	d7: k-factor/Unchanged
24122#	a0: pointer into memory for packed bcd string formation
24123#	a1: ptr to PTENxx array/Unchanged
24124#	a2: ptr to FP_SCR1(a6)/Unchanged
24125#	fp0: int portion of Y/abs(YINT) adjusted
24126#	fp1: 10^ISCALE/Unchanged
24127#	fp2: x/10^LEN
24128#	F_SCR1:x/x
24129#	F_SCR2:Y with original exponent/Unchanged
24130#	L_SCR1:original USER_FPCR/Unchanged
24131#	L_SCR2:first word of X packed/Unchanged
24132
24133A13_st:
24134	swap		%d5		# put ICTR in lower word of d5
24135	tst.w		%d5		# check if ICTR = 0
24136	bne		not_zr		# if non-zero, go to second test
24137#
24138# Compute 10^(LEN-1)
24139#
24140	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
24141	mov.l		%d4,%d0		# put LEN in d0
24142	subq.l		&1,%d0		# d0 = LEN -1
24143	clr.l		%d3		# clr table index
24144l_loop:
24145	lsr.l		&1,%d0		# shift next bit into carry
24146	bcc.b		l_next		# if zero, skip the mul
24147	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
24148l_next:
24149	add.l		&12,%d3		# inc d3 to next pwrten table entry
24150	tst.l		%d0		# test if LEN is zero
24151	bne.b		l_loop		# if not, loop
24152#
24153# 10^LEN-1 is computed for this test and A14.  If the input was
24154# denormalized, check only the case in which YINT > 10^LEN.
24155#
24156	tst.b		BINDEC_FLG(%a6)	# check if input was norm
24157	beq.b		A13_con		# if norm, continue with checking
24158	fabs.x		%fp0		# take abs of YINT
24159	bra		test_2
24160#
24161# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
24162#
24163A13_con:
24164	fabs.x		%fp0		# take abs of YINT
24165	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
24166	fbge.w		test_2		# if greater, do next test
24167	subq.l		&1,%d6		# subtract 1 from ILOG
24168	mov.w		&1,%d5		# set ICTR
24169	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
24170	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
24171	bra.w		A6_str		# return to A6 and recompute YINT
24172test_2:
24173	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
24174	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
24175	fblt.w		A14_st		# if less, all is ok, go to A14
24176	fbgt.w		fix_ex		# if greater, fix and redo
24177	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
24178	addq.l		&1,%d6		# and inc ILOG
24179	bra.b		A14_st		# and continue elsewhere
24180fix_ex:
24181	addq.l		&1,%d6		# increment ILOG by 1
24182	mov.w		&1,%d5		# set ICTR
24183	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
24184	bra.w		A6_str		# return to A6 and recompute YINT
24185#
24186# Since ICTR <> 0, we have already been through one adjustment,
24187# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
24188# 10^LEN is again computed using whatever table is in a1 since the
24189# value calculated cannot be inexact.
24190#
24191not_zr:
24192	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
24193	mov.l		%d4,%d0		# put LEN in d0
24194	clr.l		%d3		# clr table index
24195z_loop:
24196	lsr.l		&1,%d0		# shift next bit into carry
24197	bcc.b		z_next		# if zero, skip the mul
24198	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
24199z_next:
24200	add.l		&12,%d3		# inc d3 to next pwrten table entry
24201	tst.l		%d0		# test if LEN is zero
24202	bne.b		z_loop		# if not, loop
24203	fabs.x		%fp0		# get abs(YINT)
24204	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
24205	fbneq.w		A14_st		# if not, skip this
24206	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
24207	addq.l		&1,%d6		# and inc ILOG by 1
24208	addq.l		&1,%d4		# and inc LEN
24209	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
24210
24211# A14. Convert the mantissa to bcd.
24212#      The binstr routine is used to convert the LEN digit
24213#      mantissa to bcd in memory.  The input to binstr is
24214#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
24215#      such that the decimal point is to the left of bit 63.
24216#      The bcd digits are stored in the correct position in
24217#      the final string area in memory.
24218#
24219#
24220# Register usage:
24221#	Input/Output
24222#	d0: x/LEN call to binstr - final is 0
24223#	d1: x/0
24224#	d2: x/ms 32-bits of mant of abs(YINT)
24225#	d3: x/ls 32-bits of mant of abs(YINT)
24226#	d4: LEN/Unchanged
24227#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24228#	d6: ILOG
24229#	d7: k-factor/Unchanged
24230#	a0: pointer into memory for packed bcd string formation
24231#	    /ptr to first mantissa byte in result string
24232#	a1: ptr to PTENxx array/Unchanged
24233#	a2: ptr to FP_SCR1(a6)/Unchanged
24234#	fp0: int portion of Y/abs(YINT) adjusted
24235#	fp1: 10^ISCALE/Unchanged
24236#	fp2: 10^LEN/Unchanged
24237#	F_SCR1:x/Work area for final result
24238#	F_SCR2:Y with original exponent/Unchanged
24239#	L_SCR1:original USER_FPCR/Unchanged
24240#	L_SCR2:first word of X packed/Unchanged
24241
24242A14_st:
24243	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
24244	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
24245	lea.l		FP_SCR0(%a6),%a0
24246	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
24247	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
24248	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
24249	clr.l		4(%a0)		# zero word 2 of FP_RES
24250	clr.l		8(%a0)		# zero word 3 of FP_RES
24251	mov.l		(%a0),%d0	# move exponent to d0
24252	swap		%d0		# put exponent in lower word
24253	beq.b		no_sft		# if zero, don't shift
24254	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
24255	tst.l		%d0		# check if > 1
24256	bgt.b		no_sft		# if so, don't shift
24257	neg.l		%d0		# make exp positive
24258m_loop:
24259	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
24260	roxr.l		&1,%d3		# the number of places
24261	dbf.w		%d0,m_loop	# given in d0
24262no_sft:
24263	tst.l		%d2		# check for mantissa of zero
24264	bne.b		no_zr		# if not, go on
24265	tst.l		%d3		# continue zero check
24266	beq.b		zer_m		# if zero, go directly to binstr
24267no_zr:
24268	clr.l		%d1		# put zero in d1 for addx
24269	add.l		&0x00000080,%d3	# inc at bit 7
24270	addx.l		%d1,%d2		# continue inc
24271	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
24272zer_m:
24273	mov.l		%d4,%d0		# put LEN in d0 for binstr call
24274	addq.l		&3,%a0		# a0 points to M16 byte in result
24275	bsr		binstr		# call binstr to convert mant
24276
24277
24278# A15. Convert the exponent to bcd.
24279#      As in A14 above, the exp is converted to bcd and the
24280#      digits are stored in the final string.
24281#
24282#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
24283#
24284#  	 32               16 15                0
24285#	-----------------------------------------
24286#  	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
24287#	-----------------------------------------
24288#
24289# And are moved into their proper places in FP_SCR0.  If digit e4
24290# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
24291# written as specified in the 881/882 manual for packed decimal.
24292#
24293# Register usage:
24294#	Input/Output
24295#	d0: x/LEN call to binstr - final is 0
24296#	d1: x/scratch (0);shift count for final exponent packing
24297#	d2: x/ms 32-bits of exp fraction/scratch
24298#	d3: x/ls 32-bits of exp fraction
24299#	d4: LEN/Unchanged
24300#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24301#	d6: ILOG
24302#	d7: k-factor/Unchanged
24303#	a0: ptr to result string/ptr to L_SCR1(a6)
24304#	a1: ptr to PTENxx array/Unchanged
24305#	a2: ptr to FP_SCR1(a6)/Unchanged
24306#	fp0: abs(YINT) adjusted/float(ILOG)
24307#	fp1: 10^ISCALE/Unchanged
24308#	fp2: 10^LEN/Unchanged
24309#	F_SCR1:Work area for final result/BCD result
24310#	F_SCR2:Y with original exponent/ILOG/10^4
24311#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
24312#	L_SCR2:first word of X packed/Unchanged
24313
24314A15_st:
24315	tst.b		BINDEC_FLG(%a6)	# check for denorm
24316	beq.b		not_denorm
24317	ftest.x		%fp0		# test for zero
24318	fbeq.w		den_zero	# if zero, use k-factor or 4933
24319	fmov.l		%d6,%fp0	# float ILOG
24320	fabs.x		%fp0		# get abs of ILOG
24321	bra.b		convrt
24322den_zero:
24323	tst.l		%d7		# check sign of the k-factor
24324	blt.b		use_ilog	# if negative, use ILOG
24325	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
24326	bra.b		convrt		# do it
24327use_ilog:
24328	fmov.l		%d6,%fp0	# float ILOG
24329	fabs.x		%fp0		# get abs of ILOG
24330	bra.b		convrt
24331not_denorm:
24332	ftest.x		%fp0		# test for zero
24333	fbneq.w		not_zero	# if zero, force exponent
24334	fmov.s		FONE(%pc),%fp0	# force exponent to 1
24335	bra.b		convrt		# do it
24336not_zero:
24337	fmov.l		%d6,%fp0	# float ILOG
24338	fabs.x		%fp0		# get abs of ILOG
24339convrt:
24340	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
24341	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
24342	mov.l		4(%a2),%d2	# move word 2 to d2
24343	mov.l		8(%a2),%d3	# move word 3 to d3
24344	mov.w		(%a2),%d0	# move exp to d0
24345	beq.b		x_loop_fin	# if zero, skip the shift
24346	sub.w		&0x3ffd,%d0	# subtract off bias
24347	neg.w		%d0		# make exp positive
24348x_loop:
24349	lsr.l		&1,%d2		# shift d2:d3 right
24350	roxr.l		&1,%d3		# the number of places
24351	dbf.w		%d0,x_loop	# given in d0
24352x_loop_fin:
24353	clr.l		%d1		# put zero in d1 for addx
24354	add.l		&0x00000080,%d3	# inc at bit 6
24355	addx.l		%d1,%d2		# continue inc
24356	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
24357	mov.l		&4,%d0		# put 4 in d0 for binstr call
24358	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
24359	bsr		binstr		# call binstr to convert exp
24360	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
24361	mov.l		&12,%d1		# use d1 for shift count
24362	lsr.l		%d1,%d0		# shift d0 right by 12
24363	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
24364	lsr.l		%d1,%d0		# shift d0 right by 12
24365	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
24366	tst.b		%d0		# check if e4 is zero
24367	beq.b		A16_st		# if zero, skip rest
24368	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
24369
24370
24371# A16. Write sign bits to final string.
24372#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
24373#
24374# Register usage:
24375#	Input/Output
24376#	d0: x/scratch - final is x
24377#	d2: x/x
24378#	d3: x/x
24379#	d4: LEN/Unchanged
24380#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24381#	d6: ILOG/ILOG adjusted
24382#	d7: k-factor/Unchanged
24383#	a0: ptr to L_SCR1(a6)/Unchanged
24384#	a1: ptr to PTENxx array/Unchanged
24385#	a2: ptr to FP_SCR1(a6)/Unchanged
24386#	fp0: float(ILOG)/Unchanged
24387#	fp1: 10^ISCALE/Unchanged
24388#	fp2: 10^LEN/Unchanged
24389#	F_SCR1:BCD result with correct signs
24390#	F_SCR2:ILOG/10^4
24391#	L_SCR1:Exponent digits on return from binstr
24392#	L_SCR2:first word of X packed/Unchanged
24393
24394A16_st:
24395	clr.l		%d0		# clr d0 for collection of signs
24396	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
24397	tst.l		L_SCR2(%a6)	# check sign of original mantissa
24398	bge.b		mant_p		# if pos, don't set SM
24399	mov.l		&2,%d0		# move 2 in to d0 for SM
24400mant_p:
24401	tst.l		%d6		# check sign of ILOG
24402	bge.b		wr_sgn		# if pos, don't set SE
24403	addq.l		&1,%d0		# set bit 0 in d0 for SE
24404wr_sgn:
24405	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
24406
24407# Clean up and restore all registers used.
24408
24409	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
24410	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
24411	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
24412	rts
24413
24414	global		PTENRN
24415PTENRN:
24416	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24417	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24418	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24419	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24420	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24421	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
24422	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
24423	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
24424	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
24425	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
24426	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
24427	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
24428	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
24429
24430	global		PTENRP
24431PTENRP:
24432	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24433	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24434	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24435	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24436	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24437	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
24438	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
24439	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
24440	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
24441	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
24442	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
24443	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
24444	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
24445
24446	global		PTENRM
24447PTENRM:
24448	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24449	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24450	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24451	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24452	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24453	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
24454	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
24455	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
24456	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
24457	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
24458	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
24459	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
24460	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
24461
24462#########################################################################
24463# binstr(): Converts a 64-bit binary integer to bcd.			#
24464#									#
24465# INPUT *************************************************************** #
24466#	d2:d3 = 64-bit binary integer					#
24467#	d0    = desired length (LEN)					#
24468#	a0    = pointer to start in memory for bcd characters		#
24469#          	(This pointer must point to byte 4 of the first		#
24470#          	 lword of the packed decimal memory string.)		#
24471#									#
24472# OUTPUT ************************************************************** #
24473#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
24474#									#
24475# ALGORITHM ***********************************************************	#
24476#	The 64-bit binary is assumed to have a decimal point before	#
24477#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
24478#	shift and a mul by 8 shift.  The bits shifted out of the	#
24479#	msb form a decimal digit.  This process is iterated until	#
24480#	LEN digits are formed.						#
24481#									#
24482# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
24483#     digit formed will be assumed the least significant.  This is	#
24484#     to force the first byte formed to have a 0 in the upper 4 bits.	#
24485#									#
24486# A2. Beginning of the loop:						#
24487#     Copy the fraction in d2:d3 to d4:d5.				#
24488#									#
24489# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
24490#     extracts and shifts.  The three msbs from d2 will go into d1.	#
24491#									#
24492# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
24493#     will be collected by the carry.					#
24494#									#
24495# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
24496#     into d2:d3.  D1 will contain the bcd digit formed.		#
24497#									#
24498# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
24499#     zero, it is the ls digit.  Put the digit in its place in the	#
24500#     upper word of d0.  If it is the ls digit, write the word		#
24501#     from d0 to memory.						#
24502#									#
24503# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
24504#									#
24505#########################################################################
24506
24507#	Implementation Notes:
24508#
24509#	The registers are used as follows:
24510#
24511#		d0: LEN counter
24512#		d1: temp used to form the digit
24513#		d2: upper 32-bits of fraction for mul by 8
24514#		d3: lower 32-bits of fraction for mul by 8
24515#		d4: upper 32-bits of fraction for mul by 2
24516#		d5: lower 32-bits of fraction for mul by 2
24517#		d6: temp for bit-field extracts
24518#		d7: byte digit formation word;digit count {0,1}
24519#		a0: pointer into memory for packed bcd string formation
24520#
24521
24522	global		binstr
24523binstr:
24524	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
24525
24526#
24527# A1: Init d7
24528#
24529	mov.l		&1,%d7		# init d7 for second digit
24530	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
24531#
24532# A2. Copy d2:d3 to d4:d5.  Start loop.
24533#
24534loop:
24535	mov.l		%d2,%d4		# copy the fraction before muls
24536	mov.l		%d3,%d5		# to d4:d5
24537#
24538# A3. Multiply d2:d3 by 8; extract msbs into d1.
24539#
24540	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
24541	asl.l		&3,%d2		# shift d2 left by 3 places
24542	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
24543	asl.l		&3,%d3		# shift d3 left by 3 places
24544	or.l		%d6,%d2		# or in msbs from d3 into d2
24545#
24546# A4. Multiply d4:d5 by 2; add carry out to d1.
24547#
24548	asl.l		&1,%d5		# mul d5 by 2
24549	roxl.l		&1,%d4		# mul d4 by 2
24550	swap		%d6		# put 0 in d6 lower word
24551	addx.w		%d6,%d1		# add in extend from mul by 2
24552#
24553# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
24554#
24555	add.l		%d5,%d3		# add lower 32 bits
24556	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
24557	addx.l		%d4,%d2		# add with extend upper 32 bits
24558	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
24559	addx.w		%d6,%d1		# add in extend from add to d1
24560	swap		%d6		# with d6 = 0; put 0 in upper word
24561#
24562# A6. Test d7 and branch.
24563#
24564	tst.w		%d7		# if zero, store digit & to loop
24565	beq.b		first_d		# if non-zero, form byte & write
24566sec_d:
24567	swap		%d7		# bring first digit to word d7b
24568	asl.w		&4,%d7		# first digit in upper 4 bits d7b
24569	add.w		%d1,%d7		# add in ls digit to d7b
24570	mov.b		%d7,(%a0)+	# store d7b byte in memory
24571	swap		%d7		# put LEN counter in word d7a
24572	clr.w		%d7		# set d7a to signal no digits done
24573	dbf.w		%d0,loop	# do loop some more!
24574	bra.b		end_bstr	# finished, so exit
24575first_d:
24576	swap		%d7		# put digit word in d7b
24577	mov.w		%d1,%d7		# put new digit in d7b
24578	swap		%d7		# put LEN counter in word d7a
24579	addq.w		&1,%d7		# set d7a to signal first digit done
24580	dbf.w		%d0,loop	# do loop some more!
24581	swap		%d7		# put last digit in string
24582	lsl.w		&4,%d7		# move it to upper 4 bits
24583	mov.b		%d7,(%a0)+	# store it in memory string
24584#
24585# Clean up and return with result in fp0.
24586#
24587end_bstr:
24588	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
24589	rts
24590
24591#########################################################################
24592# XDEF ****************************************************************	#
24593#	facc_in_b(): dmem_read_byte failed				#
24594#	facc_in_w(): dmem_read_word failed				#
24595#	facc_in_l(): dmem_read_long failed				#
24596#	facc_in_d(): dmem_read of dbl prec failed			#
24597#	facc_in_x(): dmem_read of ext prec failed			#
24598#									#
24599#	facc_out_b(): dmem_write_byte failed				#
24600#	facc_out_w(): dmem_write_word failed				#
24601#	facc_out_l(): dmem_write_long failed				#
24602#	facc_out_d(): dmem_write of dbl prec failed			#
24603#	facc_out_x(): dmem_write of ext prec failed			#
24604#									#
24605# XREF ****************************************************************	#
24606#	_real_access() - exit through access error handler		#
24607#									#
24608# INPUT ***************************************************************	#
24609#	None								#
24610# 									#
24611# OUTPUT **************************************************************	#
24612#	None								#
24613#									#
24614# ALGORITHM ***********************************************************	#
24615# 	Flow jumps here when an FP data fetch call gets an error 	#
24616# result. This means the operating system wants an access error frame	#
24617# made out of the current exception stack frame. 			#
24618#	So, we first call restore() which makes sure that any updated	#
24619# -(an)+ register gets returned to its pre-exception value and then	#
24620# we change the stack to an acess error stack frame.			#
24621#									#
24622#########################################################################
24623
24624facc_in_b:
24625	movq.l		&0x1,%d0			# one byte
24626	bsr.w		restore				# fix An
24627
24628	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
24629	bra.w		facc_finish
24630
24631facc_in_w:
24632	movq.l		&0x2,%d0			# two bytes
24633	bsr.w		restore				# fix An
24634
24635	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
24636	bra.b		facc_finish
24637
24638facc_in_l:
24639	movq.l		&0x4,%d0			# four bytes
24640	bsr.w		restore				# fix An
24641
24642	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
24643	bra.b		facc_finish
24644
24645facc_in_d:
24646	movq.l		&0x8,%d0			# eight bytes
24647	bsr.w		restore				# fix An
24648
24649	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
24650	bra.b		facc_finish
24651
24652facc_in_x:
24653	movq.l		&0xc,%d0			# twelve bytes
24654	bsr.w		restore				# fix An
24655
24656	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
24657	bra.b		facc_finish
24658
24659################################################################
24660
24661facc_out_b:
24662	movq.l		&0x1,%d0			# one byte
24663	bsr.w		restore				# restore An
24664
24665	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
24666	bra.b		facc_finish
24667
24668facc_out_w:
24669	movq.l		&0x2,%d0			# two bytes
24670	bsr.w		restore				# restore An
24671
24672	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
24673	bra.b		facc_finish
24674
24675facc_out_l:
24676	movq.l		&0x4,%d0			# four bytes
24677	bsr.w		restore				# restore An
24678
24679	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
24680	bra.b		facc_finish
24681
24682facc_out_d:
24683	movq.l		&0x8,%d0			# eight bytes
24684	bsr.w		restore				# restore An
24685
24686	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
24687	bra.b		facc_finish
24688
24689facc_out_x:
24690	mov.l		&0xc,%d0			# twelve bytes
24691	bsr.w		restore				# restore An
24692
24693	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
24694
24695# here's where we actually create the access error frame from the
24696# current exception stack frame.
24697facc_finish:
24698	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
24699
24700	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
24701	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
24702	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
24703
24704	unlk		%a6
24705
24706	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
24707	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
24708	mov.l		0xc(%sp),0x8(%sp)	# store EA
24709	mov.l		&0x00000001,0xc(%sp)	# store FSLW
24710	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
24711	mov.w		&0x4008,0x6(%sp)	# store voff
24712
24713	btst		&0x5,(%sp)		# supervisor or user mode?
24714	beq.b		facc_out2		# user
24715	bset		&0x2,0xd(%sp)		# set supervisor TM bit
24716
24717facc_out2:
24718	bra.l		_real_access
24719
24720##################################################################
24721
24722# if the effective addressing mode was predecrement or postincrement,
24723# the emulation has already changed its value to the correct post-
24724# instruction value. but since we're exiting to the access error
24725# handler, then AN must be returned to its pre-instruction value.
24726# we do that here.
24727restore:
24728	mov.b		EXC_OPWORD+0x1(%a6),%d1
24729	andi.b		&0x38,%d1		# extract opmode
24730	cmpi.b		%d1,&0x18		# postinc?
24731	beq.w		rest_inc
24732	cmpi.b		%d1,&0x20		# predec?
24733	beq.w		rest_dec
24734	rts
24735
24736rest_inc:
24737	mov.b		EXC_OPWORD+0x1(%a6),%d1
24738	andi.w		&0x0007,%d1		# fetch An
24739
24740	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
24741	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
24742
24743tbl_rest_inc:
24744	short		ri_a0 - tbl_rest_inc
24745	short		ri_a1 - tbl_rest_inc
24746	short		ri_a2 - tbl_rest_inc
24747	short		ri_a3 - tbl_rest_inc
24748	short		ri_a4 - tbl_rest_inc
24749	short		ri_a5 - tbl_rest_inc
24750	short		ri_a6 - tbl_rest_inc
24751	short		ri_a7 - tbl_rest_inc
24752
24753ri_a0:
24754	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
24755	rts
24756ri_a1:
24757	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
24758	rts
24759ri_a2:
24760	sub.l		%d0,%a2			# fix a2
24761	rts
24762ri_a3:
24763	sub.l		%d0,%a3			# fix a3
24764	rts
24765ri_a4:
24766	sub.l		%d0,%a4			# fix a4
24767	rts
24768ri_a5:
24769	sub.l		%d0,%a5			# fix a5
24770	rts
24771ri_a6:
24772	sub.l		%d0,(%a6)		# fix stacked a6
24773	rts
24774# if it's a fmove out instruction, we don't have to fix a7
24775# because we hadn't changed it yet. if it's an opclass two
24776# instruction (data moved in) and the exception was in supervisor
24777# mode, then also also wasn't updated. if it was user mode, then
24778# restore the correct a7 which is in the USP currently.
24779ri_a7:
24780	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
24781	bne.b		ri_a7_done		# out
24782
24783	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
24784	bne.b		ri_a7_done		# supervisor
24785	movc		%usp,%a0		# restore USP
24786	sub.l		%d0,%a0
24787	movc		%a0,%usp
24788ri_a7_done:
24789	rts
24790
24791# need to invert adjustment value if the <ea> was predec
24792rest_dec:
24793	neg.l		%d0
24794	bra.b		rest_inc
24795