1/*	$NetBSD: unimpl_emul.S,v 1.2 2002/02/24 01:04:27 matt Exp $	*/
2
3/*
4 * Copyright (c) 2001 Brandon Creighton.  All rights reserved.
5 * Copyright (c) 2000 Ludd, University of Lule}, Sweden. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *      This product includes software developed at Ludd, University of
18 *      Lule}, Sweden and its contributors.
19 * 4. The name of the author may not be used to endorse or promote products
20 *    derived from this software without specific prior written permission
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34
35#include <machine/asm.h>
36#include "assym.h"
37
38# Only intended for debugging emulation code (security hole)
39#undef	EMULATE_INKERNEL
40
41# Defines to fetch register operands
42#define	S_R0	(%fp)
43#define	S_R1	4(%fp)
44#define	S_R2	8(%fp)
45#define	S_R3	12(%fp)
46#define	S_R4	16(%fp)
47#define	S_R5	20(%fp)
48#define	S_R6	24(%fp)
49#define	S_R7	28(%fp)
50#define	S_R8	32(%fp)
51#define	S_R9	36(%fp)
52#define	S_R10	40(%fp)
53#define	S_R11	44(%fp)
54#define	S_AP	48(%fp)
55#define	S_FP	52(%fp)
56#define	S_SP	56(%fp)
57#define	S_PC	60(%fp)
58#define	S_PSL	64(%fp)
59
60# The condition codes.
61
62#define PSL_C	1
63#define PSL_V	2
64#define PSL_Z	4
65#define PSL_N	8
66#define PSL_Q  15		# all four
67
68#
69# Emulation of instruction trapped via SCB vector 0x18. (reserved op)
70#
71ALTENTRY(unimemu)
72	pushl	%r0
73	movl	8(%sp),%r0	# get trap address
74	movzbl	(%r0),%r0		# fetch insn generating trap
75	caseb	%r0,$0x74,$1	# case to jump to address
760:	.word	emodd-0b
77	.word	polyd-0b
78
791:	movl	(%sp)+,%r0	# restore reg
80	rsb			# continue fault
81
82#
83# switch the code back over to user mode.
84# puts the psl + pc (+ jsb return address) on top of user stack.
85#
86#ifdef EMULATE_INKERNEL
87touser:	movl	(%sp),-52(%sp)	# save rsb address on top of new stack
88	movl	4(%sp),%r0	# restore saved reg
89	addl2	$12,%sp		# pop junk from stack
90	pushr	$0x7fff		# save all regs
91	movl	%sp,%fp		# new frame pointer
92	tstl	-(%sp)		# remember old rsb address
93	incl	S_PC		# skip matching insn
94	rsb
95#else
96touser:	mfpr	$PR_USP,%r0	# get user stack pointer
97	movl	4(%sp),-68(%r0)	# move already saved %r0
98	movl	(%sp),-72(%r0)	# move return address
99	movq	12(%sp),-8(%r0)	# move pc + psl
100	addl2	$12,%sp		# remove moved fields from stack
101	movl	$1f,(%sp)	# change return address
102	rei
1031:	subl2	$8,%sp		# trapaddr + psl already on stack
104	pushr	$0x7ffe		# %r0 already saved
105	subl2	$8,%sp		# do not trash %r0 + retaddr
106	movab	4(%sp),%fp
107	incl	S_PC		# skip matching insn
108	rsb
109#endif
110
111#
112# Restore registers, cleanup and continue
113#
114goback:	movl	%fp,%sp		# be sure
115	popr	$0x7fff		# restore all regs
116	rei
117
118/*
119 * getval: is used by the getval_* functions.  Gets the value specified by the
120 * current operand specifier pointed to by S_PC.  It also increments S_PC.
121 */
122getval:
123	clrq	%r0
124	pushr	$(R2+R3+R4+R5+R6)
125	movl	S_PC,%r3		# argument address
126	extzv	$4,$4,(%r3),%r2	# get mode
127	caseb	%r2,$0,$0xf
1280:	.word	getval_literal-0b		# 0-3 literal
129	.word	getval_literal-0b
130	.word	getval_literal-0b
131	.word	getval_literal-0b
132	.word	2f-0b		# 4 indexed
133	.word	getval_reg-0b			# 5 register
134	.word	getval_regdefer-0b		# 6 register deferred
135	.word	2f-0b		# 7 register deferred
136	.word	getval_ai-0b			# 8 autoincrement
137	.word	2f-0b		# 9 autoincrement deferred
138	.word	getval_bytedis-0b		# A byte displacement
139	.word	2f-0b		# B byte displacement deferred
140	.word	2f-0b		# C word displacement
141	.word	2f-0b		# D word displacement deferred
142	.word	getval_longdis-0b		# E longword displacement
143	.word	2f-0b		# F longword displacement deferred
144#ifdef EMULATE_INKERNEL
1452:	movab	0f,%r0
146	movl	%r2,%r1
147	brw	die
1480:	.asciz	"getval: missing address mode %d\n"
149#else
1502:	.word 	0xffff		# reserved operand
151#endif
152
153	/*
154	 * 0x00-0x03
155	 * Literal mode.  Note:  getval_{d,f}float will *never* use this routine
156	 * to get literal values, since they treat them differently (see those routines
157	 * for details).
158	 */
159getval_literal:
160	movzbl	(%r3)+,%r0	# correct operand
161	brw 4f
162
163	/*
164	 * 0x05
165     * Register mode.  Grab the register number, yank the value out.
166	 */
167getval_reg:
168	extzv	$0,$4,(%r3),%r2	# Get reg number
169	incl	%r3
170	ashl	$2,%r2,%r2
171	addl3	%fp,%r2,%r5
172	bsbw	emul_extract
173	brw		4f
174
175	/*
176	 * 0x06
177     * Register deferred mode.  Grab the register number, yank the value out,
178	 * use that as the address to get the real value.
179	 */
180getval_regdefer:
181	extzv	$0,$4,(%r3),%r2	# Get reg number
182	incl	%r3
183	ashl	$2,%r2,%r2
184	addl2	%fp,%r2
185	movl	(%r2),%r5
186	bsbw	emul_extract
187	brw		4f
188
189	/*
190	 * 0x08 Autoincrement mode
191     * Get the value in the register, use that as the address of our target,
192	 * then increment the register.
193	 */
194getval_ai:
195	extzv	$0,$4,(%r3),%r2	# Get reg number
196	incl	%r3
197
198	/*
199	 * In the case of the register being PC (0xf), this is called immediate mode;
200	 * we can treat it the same as any other register, as long as we keep %r3
201     * and S_PC in sync.  We do that here.
202	 */
203	movl 	%r3,S_PC
204
205	ashl	$2,%r2,%r2
206	addl2	%fp,%r2
207	movl	(%r2),%r5
208	bsbw	emul_extract
209	addl2	%r6,(%r2)
210
211	movl	S_PC,%r3		/* if PC did change, S_PC was changed too */
212	brw		4f
213
214	/*
215	 * 0xA
216	 * Byte displacement mode.
217     */
218getval_bytedis:
219	extzv	$0, $4, (%r3), %r2	# get register
220	incl	%r3
221	ashl	$2,%r2,%r2
222	addl2	%fp,%r2
223	movl	(%r2),%r5
224	movzbl	(%r3),%r4
225	incl	%r3
226	addl2	%r4, %r5
227	bsbw	emul_extract
228	brw		4f
229
230	/*
231	 * 0xE
232	 * Longword displacement mode.
233     */
234getval_longdis:
235	extzv	$0, $4, (%r3), %r2	# get register
236	incl	%r3
237	ashl	$2,%r2,%r2
238	addl2	%fp,%r2
239	movl	(%r2),%r5
240	movl	(%r3)+,%r4
241	addl2	%r4, %r5
242	bsbw	emul_extract
243
2444:	movl	%r3,S_PC
245	popr	$(R2+R3+R4+R5+R6)
246	rsb
247
248/*
249 * emul_extract: used by the getval functions.  This extracts exactly %r6 bytes
250 * from the address in %r5 and places them in %r0 and %r1 (if necessary).
251 * 8 is the current maximum length.
252 */
253emul_extract:
254	cmpl $0x8, %r6
255	bgeq 1f
256	.word 	0xffff		# reserved operand
2571:
258	caseb %r6, $0x1, $0x7
2590:	.word 1f-0b			# 1: byte
260	.word 2f-0b			# 2: word
261	.word 9f-0b			# unknown
262	.word 4f-0b			# 4: longword
263	.word 9f-0b			# unknown
264	.word 9f-0b			# unknown
265	.word 9f-0b			# unknown
266	.word 8f-0b			# 8: quadword
267
2681:	movzbl (%r5), %r0
269	rsb
270
2712:	movzwl (%r5), %r0
272	rsb
273
2744:	movl (%r5), %r0
275	rsb
276
2778:	movq (%r5), %r0
278	rsb
279
2809:
281	.word 	0xffff		# reserved operand
282	rsb
283
284getval_dfloat:
285	clrq	%r0
286	pushr	$(R2+R3+R6)	# use %r2+%r3 as scratch reg
287	movl	S_PC,%r3		# argument address
288	extzv	$4,$4,(%r3),%r2	# get mode
289	caseb	%r2,$0,$0x3
2900:	.word	1f-0b		# 0-3 literal
291	.word	1f-0b
292	.word	1f-0b
293	.word	1f-0b
294
295	movl	$0x8, %r6
296	bsbw	getval
297	brw	4f
298
2991:	insv	(%r3),$0,$3,%r0	# insert fraction
300	extzv	$3,$3,(%r3),%r2	# get exponent
301	addl2	$128,%r2		# bias the exponent
302	insv	%r2,$7,$8,%r0	# insert exponent
303	tstb	(%r3)+
304	movl	%r3,S_PC
3054:
306	popr	$(R2+R3+R6)
307	rsb
308
309getval_long:
310	clrl	%r0
311	pushr	$(R6+R1)
312	movl	$0x4, %r6
313	bsbw	getval
314	popr	$(R6+R1)
315	rsb
316
317getval_word:
318	clrl	%r0
319	pushr	$(R6+R1)
320	movl	$0x2, %r6
321	bsbw	getval
322	popr	$(R6+R1)
323	rsb
324
325getval_byte:
326	clrl	%r0
327	pushr	$(R6+R1)	# use %r2+%r3 as scratch reg
328	movl	$0x1, %r6
329	bsbw	getval
330	popr	$(R6+R1)
331	rsb
332
333#
334# getaddr_byte get 4 bytes and stores them in %r0. Increases PC.
335#
336getaddr_byte:
337	clrl	%r0
338	pushr	$(R2+R3)	# use %r2+%r3 as scratch reg
339	movl	S_PC,%r3		# argument address
340	extzv	$4,$4,(%r3),%r2	# get mode
341	caseb	%r2,$0,$0xf
3420:	.word	2f-0b		# 0-3 literal
343	.word	2f-0b
344	.word	2f-0b
345	.word	2f-0b
346	.word	2f-0b		# 4
347	.word	6f-0b		# 5 register
348	.word	5f-0b		# 6 deferred
349	.word	2f-0b		# 7 autodecr (missing)
350	.word	2f-0b		# 8 autoincr (missing)
351	.word	2f-0b		# 9 autoincr deferred (missing)
352	.word	7f-0b		# 10 byte disp
353	.word	2f-0b		# 11 byte disp deferred (missing)
354	.word	8f-0b		# 12 word disp
355	.word	2f-0b		# 13 word disp deferred (missing)
356	.word	1f-0b		# 14 long disp
357	.word	2f-0b		# 15 long disp deferred (missing)
358#ifdef EMULATE_INKERNEL
3592:	movab	3f,%r0
360	movl	%r2,%r1
361	brw	die		# reserved operand
3623:	.asciz	"getaddr_byte: missing address mode %d\n"
363#else
3642:	.word	0xffff		# reserved operand
365#endif
366
3671:	extzv	$0,$4,(%r3),%r2	# Get reg number
368	incl	%r3
369	movl	(%fp)[%r2],%r0	# Register contents
370	addl2	(%r3),%r0		# add displacement
371	cmpl	%r2,$15		# pc?
372	bneq	0f		# no, skip
373	addl2	$5,%r0		# compensate for displacement size
3740:	addl2	$4,%r3		# increase pc
375	brw	4f
376
3775:	extzv	$0,$4,(%r3),%r2	# Get reg number
378	incl	%r3
379	movl	(%fp)[%r2],%r0
380	brw	4f
381
3827:
383	extzv	$0, $4, (%r3), %r2	# get register
384	incl	%r3
385	movl	(%fp)[%r2],%r0		# Register contents
386	pushl	%r4
387	cvtbl	(%r3),%r4
388	addl2	%r4,%r0			# add displacement
389	movl	(%sp)+,%r4
390	cmpl	%r2,$15			# pc?
391	bneq	0f			# no, skip
392	addl2	$2,%r0			# compensate for displacement size
3930:	incl	%r3			# increase pc
394	brw	4f
395
3968:
397	extzv	$0, $4, (%r3), %r2	# get register
398	incl	%r3
399	movl	(%fp)[%r2],%r0		# Register contents
400	pushl	%r4
401	cvtwl	(%r3),%r4
402	addl2	%r4,%r0			# add displacement
403	movl	(%sp)+,%r4
404	cmpl	%r2,$15			# pc?
405	bneq	0f			# no, skip
406	addl2	$3,%r0			# compensate for displacement size
4070:	addl2	$2,%r3			# increase pc
408	brw	4f
409
4106:	extzv	$0,$4,(%r3),%r2	# Get reg number
411	incl	%r3
412	moval	(%fp)[%r2],%r0
413
4144:	movl	%r3,S_PC
415	popr	$(R2+R3)
416	rsb
417
418#
419# Polynomial calculation, d-float
420# Uses d-float instructions, so hopefully d-float is available.
421#
422# polyd MISSING:
423#	- check for bad arguments
424#	- set PSL flags
425#	- do not use d-float instructions (may be emulated)
426#
427polyd:	bsbw	touser		# go back to user mode
428	bsbw	getval_dfloat	# fetches argument to %r0/%r1
429	movq	%r0,%r6
430	bsbw	getval_word
431	movl	%r0,%r4
432	bsbw	getaddr_byte
433	movl	%r0,%r3
434	clrq	%r0
435# Ok, do the real calculation (Horner's method)
4360:	addd2	(%r3)+,%r0	# add constant
437	tstl	%r4		# more?
438	beql	1f		# no, exit
439	muld2	%r6,%r0		# multiply with arg
440	decl	%r4		# lower degree
441	brb	0b
442
4431:	movq	%r0,(%fp)
444	clrl	S_R2
445	movl	%r3,S_R3
446	clrq	S_R4
447	brw	goback
448
449
450#ifdef EMULATE_INKERNEL
451# When we end up somewhere we don't want.
452die:	pushl	%r1
453	pushl	%r0
454	calls	$2,_printf
455	movl	%fp,sp
456	brw	goback		# anything may happen
457#endif
458
459# these emodd-related
460#define TMPSIZE 0x20	/* temp bytes -- be careful with this! */
461#define PRECIS 0x7
462#define TMPFRAC1 (%ap)
463#define TMPFRAC2 32(%ap)
464#define TMPFRACTGT 64(%ap)
465#
466# Extended multiply/modulus
467# XXX just EMODD for now
468emodd:	bsbw	touser
469
470	/* Clear the condition codes; we will set them as needed later. */
471	bicl2 $(PSL_C|PSL_V|PSL_Z|PSL_N), S_PSL
472
473	/*
474	 * We temporarily appropriate ap for the use of TMPFRAC*.
475	 */
476	pushl %ap
477	subl2 $(3*TMPSIZE), %sp
478	movl %sp, %ap
479
480	movc5 $0x0, TMPFRAC1, $0x0, $TMPSIZE, TMPFRAC1
481	movc5 $0x0, TMPFRAC2, $0x0, $TMPSIZE, TMPFRAC2
482	movc5 $0x0, TMPFRACTGT, $0x0, $TMPSIZE, TMPFRACTGT
483
484	clrl -(%sp)
485	movl %sp, %r3		/* %r3 = addr of exp space (1) */
486	clrl -(%sp)
487	movl %sp, %r5		/* %r5 = addr of exp space (2) */
488	subl2 $0x10, %sp
489	movl %sp, %r6		/* %r6 = addr of allocated target space */
490
491	/*
492	 * Now we package both numbers up and call fltext_De, which
493	 * will remove the exponent and sign; this will make them
494	 * easier to work with.  They will be in TMPFRAC1 and
495	 * TMPFRAC2 when done.
496	 */
497	bsbw getval_dfloat 	# get operand into %r0 and %r1
498
499	/* Check for sign = 0 and exp = 0; if it is, zeroexit. */
500	bicl3 $0x7f, %r0, %r4
501	cmpl %r4, $0x0
502	bneq 1f
503	bsbw getval_byte	# get multiplier extension operand
504	bsbw getval_dfloat	# get target operand
505	jmp zeroexit
5061:
507
508	/* Check for sign = 1 and exp = 0; if it is, do a resopflt. */
509	cmpw %r0, $0x8000
510	bneq 1f
511	bsbw getval_byte	# get multiplier extension operand
512	bsbw getval_dfloat 	# get operand into %r0 and %r1
513	extzv $0, $0xff, %r0, %r0	# generate a resopflt -- XXX is this ok?
5141:
515	movd %r0, TMPFRACTGT
516	bicl3 $0xffff7fff, %r0, %r6 # Extract the sign while we're here.
517	bsbw getval_byte	# get multiplier extension operand
518	movzbl %r0, -(%sp)
519	movd %r9, %r0
520	pushl %r3
521	pushab TMPFRAC1
522	movab TMPFRACTGT, -(%sp)
523	calls $0x4, fltext_De
524
525	bsbw getval_dfloat 	# get operand into %r0 and %r1
526
527	/* Check for sign = 0 and exp = 0; if it is, zeroexit. */
528	bicl3 $0x7f, %r0, %r4
529	cmpl %r4, $0x0
530	bneq 1f
531	bsbw getval_byte	# get multiplier extension operand
532	bsbw getval_dfloat	# get target operand
533	jmp zeroexit
5341:
535	/* Check for sign = 1 and exp = 0; if it is, do a resopflt. */
536	cmpw %r0, $0x8000
537	bneq 1f
538	bsbw getval_byte	# get multiplier extension operand
539	bsbw getval_dfloat 	# get operand into %r0 and %r1
540	extzv $0, $0xff, %r0, %r0		# generate a resopflt -- XXX is this ok?
5411:
542
543	movd %r0, TMPFRACTGT
544	bicl3 $0xffff7fff, %r0, %r7 # Extract the sign while we're here.
545	movzbl $0x0, -(%sp)	# no multiplier extension here
546	pushl %r5
547	pushab TMPFRAC2
548	movab TMPFRACTGT, -(%sp)
549	calls $0x4, fltext_De
550
551	/* first, add exponents */
552	addl3 (%r5), (%r3), %r9	/* %r9 = exponent (used later) */
553	subl2 $0x80, %r9			/* we are excess-128 */
554
555	/*
556	 * Let's calculate the target sign.  Signs from multipliers are in %r6 and
557	 * %r7, and both the fraction and integer parts have the same sign.
558	 */
559	xorl2 %r7, %r6
560
561	pushab TMPFRAC1
562	calls $0x1, bitcnt
563	movl %r0, %r1			/* %r1 = bitcount of TMPFRAC1 */
564	pushab TMPFRAC2
565	calls $0x1, bitcnt
566	movl %r0, %r2			/* %r2 = bitcount of TMPFRAC2 */
567
568	/*
569	 * Now we get ready to multiply.  This multiplies a byte at a time,
570	 * converting to double with CVTLD and adding partial results to
571	 * TMPFRACTGT.  There's probably a faster way to do this.
572	 */
573	clrd TMPFRACTGT
574	pushr $0x7fc
575	subl2 $0x8, %sp			/* make some temporary space */
576	movl %sp, %r1
577	subl2 $0x8, %sp
578	movl %sp, %r2
579
580	movl $PRECIS, %r5			/* %r5 = TMPFRAC1 byte count */
581	movl $PRECIS, %r6			/* %r6 = TMPFRAC2 byte count */
582	clrl %r7
583
5841:
585#	addl3 %r5, $TMPFRAC1, %r3		/* %r3 - current byte in tmpfrac1 */
586	movab TMPFRAC1, %r7
587	addl3 %r5, %r7, %r3
588#	addl3 %r6, $TMPFRAC2, %r4		/* %r4 - current byte in tmpfrac2 */
589	movab TMPFRAC2, %r7
590	addl3 %r6, %r7, %r4
591
592	movzbl (%r3), %r10
593	movzbl (%r4), %r11
594	mull3 %r10, %r11, %r7
595	movl %r7, %r3
596	cvtld %r7, (%r2)
597
598	subl3 %r5, $0x8, %r8
599	subl3 %r6, $0x8, %r9
600	addl2 %r8, %r9
601	mull2 $0x8, %r9
602	subl2 $0x40, %r9
603	blss 9f
604
605	/* This may be bigger than a longword.  Break it up. */
6065:	cmpl %r9, $0x1e
607	bleq 6f
608	subl2 $0x1e, %r9
609
610	ashl $0x1e, $0x1, %r8
611	cvtld %r8, (%r1)
612	muld2 (%r1), (%r2)
613	jmp 5b
6146:
615	ashl %r9, $0x1, %r8
616	cvtld %r8, (%r1)
617	muld2 (%r1), (%r2)
618	addd2 (%r2), TMPFRACTGT
619
6209:
621	cmpl %r5, $0x0
622	beql 2f
623	decl %r5
624	jmp 1b
6252:	cmpl %r6, $0x0
626	beql 3f
627	decl %r6
628	movl $PRECIS, %r5
629	jmp 1b
6303:
631
632	/*
633	 * At this point, %r9 might not reflect the final exponent we will use;
634	 * i.e., we need post-normalization.  Luckily, we still have (in %r7)
635	 * the results from the last individual multiplication handy.  Here
636	 * we calculate how many bits it will take to shift the value in %r7
637	 * so that bit 15 = 1.
638	 */
639	addl2 $0x10, %sp
640	movl %r7, 0x14(%sp)	/* move %r7 onto the frame we're about to pop off */
641   	popr  $0x7fc
642
643	clrl %r3	/* %r3 = counter */
644	movl %r7, %r8		/* %r8 = temp */
6451:
646	bicl3 $0xffff7fff, %r8, %r5
647	bneq 2f
648	incl %r3
649	ashl $0x1, %r8, %r5
650	movl %r5, %r8
651	jmp 1b
6522:
653
654	/*
655	 * Now we do post-normalization (by subtracting %r3) and
656	 * put the exponent (in %r9) into TMPFRACTGT.
657	 */
658	subl2 %r3, %r9
659	insv %r9, $0x7, $0x8, TMPFRACTGT
660
661	bisl2 %r6, TMPFRACTGT	# set the sign
662
663	/*
664	 * Now we need to separate.  CVT* won't work in the case of a
665	 * >32-bit integer, so we count the integer bits and use ASHQ to
666	 * shift them away.
667	 */
668	cmpl $0x80, %r9
669	blss 7f		/* if we are less than 1.0, we can avoid this */
670	brw 8f
6717:
672	subl3 $0x80, %r9, %r8
673
674	movq TMPFRACTGT, TMPFRAC1
675	/*
676	 * Check for integer overflow by comparing the integer bit count.
677	 * If this is the case, set V in PSL.
678	 */
679	cmpl %r8, $0x20
680	blss 3f
681	bisl2 $PSL_V, S_PSL
6823:
683	cmpl %r8, $0x38
684	blss 1f
685	/*
686	 * In the case where we have more than 55 bits in the integer,
687	 * there aren't any bits left for the fraction.  Therefore we're
688	 * done here;  TMPFRAC1 is equal to TMPFRACTGT and TMPFRAC2 is 0.
689	 */
690	movq $0f0.0, TMPFRAC2
691	jmp 9f		/* we're done, move on */
6921:
693	/*
694	 * We do the mod by using ASHQ to shift and truncate the bits.
695	 * Before that happens, we have to arrange the bits in a quadword such
696	 * that the significance increases from start to finish.
697	 */
698
699	movab TMPFRACTGT, %r0
700	movab TMPFRAC1, %r1
701	movb (%r0), 7(%r1)
702	bisb2 $0x80, 7(%r1)
703	movw 2(%r0), 5(%r1)
704	movw 4(%r0), 3(%r1)
705	movb 7(%r0), 2(%r1)
706	movb 6(%r0), 1(%r1)
707
708	/* Calculate exactly how many bits to shift. */
709	subl3 %r8, $0x40, %r7
710	mnegl %r7, %r6
711	ashq %r6, TMPFRAC1, %r0			# shift right
712	ashq %r7, %r0, TMPFRAC2			# shift left
713
714	/* Now put it back into a D_. */
715	movab TMPFRAC2, %r0
716	movab TMPFRAC1, %r1
717 	extv $0x18, $0x7, 4(%r0), (%r1)
718	extzv $0x7, $0x9, TMPFRACTGT, %r2
719	insv %r2, $0x7, $0x9, (%r1)
720
721	movw 5(%r0), 2(%r1)
722	movw 3(%r0), 4(%r1)
723	movw 1(%r0), 6(%r1)
724
725	# we have the integer in TMPFRAC1, now get the fraction in TMPFRAC2
726	subd3 TMPFRAC1, TMPFRACTGT, TMPFRAC2
727	jmp 9f
728
7298:
730	/*
731	 * We are less than 1.0; TMPFRAC1 should be 0, and TMPFRAC2 should
732	 * be equal to TMPFRACTGT.
733	 */
734	movd $0f0.0, TMPFRAC1
735	movd TMPFRACTGT, TMPFRAC2
7369:
737	/*
738	 * We're done. We can use CVTDL here, since EMODD is supposed to
739	 * truncate.
740	 */
741	cvtdl TMPFRAC1, %r4
742	bsbw getaddr_byte
743	movl %r4, (%r0)
744
745	bsbw getaddr_byte
746	movq TMPFRAC2, (%r0)
747	movd TMPFRAC2, %r0		/* move this here so we can test it later */
748
749	/* Clean up sp. */
750
751	addl2 $0x74, %sp
752	movl (%sp)+, %ap
753
754	/*
755	 * Now set condition codes.  We know Z == 0; C is always 0; and V
756	 * is set above as necessary.  Check to see if TMPFRAC2 is
757	 * negative; if it is, set N.
758	 */
759	tstd %r0
760	bgeq 1f /* branch if N == 0 */
761	bisl2 $PSL_N, S_PSL
7621:
763	brw goback
764zeroexit:
765	/* Z == 1, everything else has been cleared already */
766	bisl2 $PSL_Z, S_PSL
767	bsbw getaddr_byte
768	movl $0x0, (%r0)
769	bsbw getaddr_byte
770	movd $0f0, (%r0)
771	brw goback
772
773
774
775/*
776 * bitcnt: counts significant bits backwards in a quadword
777 * returns number of bits, unless there aren't any;
778 * in that case it will return $0xffffffff
779 */
780bitcnt:
781	.word 0xffe	/* %r1-%r12 */
782
783	/*
784	 * Our goal is to factor a common power of 2 out of each of the
785	 * two factors involved in the multiplication.  Once we have that,
786	 * we can multiply them as integers.  More below.
787	 * Right now we are counting bits, starting from the highest octet
788	 * of each (the *least* significant bit at this point!) and doing
789	 * FFSes until we find a bit set.
790	 */
791	movl 4(%ap), %r0
792	movl $0x8, %r1
7931:	decl %r1
794	addl3 %r1, %r0, %r4
795	movzbl (%r4), %r2
796	ffs $0, $0x20, %r2, %r3
797	bneq 2f		/* if we found a bit, Z == 0, continue */
798	cmpl %r1, $0x0
799	jeql 3f /* if %r1 is zero and there's no bit set, qw is 0 */
800	jmp 1b			/* else continue with the loop */
801
8022:	/*
803	 * We found a bit; its position in the byte is in %r3, and %r1 is the
804	 * position of the byte in the quadword.
805	 */
806	subl3 %r3, $0x8, %r0
807	ashl $0x5, %r1, %r2
808	addl2 %r2, %r0
809	ret
810
8113:	/* this quadword is 0 */
812	movl $0xffffffff, %r0
813	ret
814
815
816/*
817 * The fltext_X routines separate fraction and exponent* bits.
818 * They return (via %r0) the amount of bits in the fraction.
819 *
820 * *: exponents are left in excess-128 form
821 *        D_ floating point first word:
822 *         F E      7 6     0
823 *        +-+--------+-------+
824 * sign-> |s|exponent| fract.|  (10-3F = fraction bits)
825 *        +-+--------+-------+
826 *        Significance order: 0-6, 10-1F, 20-2F, 30-3F
827 *
828 * The fourth argument to fltext_De is the eight extra bits for use
829 * in EMOD*, et al.  If these bits are not in use, specify 0.
830 */
831fltext_De:
832	.word 0x831	# %r0 %r1 %r2 %r3 %r4 ap (no return)
833
834	movl 0x4(%ap), %r0	# %r0 - addr of source
835	movl 0x8(%ap), %r1	# %r1 - addr of fraction destination
836
837	movb (%r0), (%r1)
838	bisb2 $0x80, (%r1)+	# This is the hidden bit.
839
840	movb 3(%r0), (%r1)+
841	movb 2(%r0), (%r1)+
842	movb 5(%r0), (%r1)+
843	movb 4(%r0), (%r1)+
844	movb 7(%r0), (%r1)+
845	movb 6(%r0), (%r1)+
846
847	/*
848	 * if there are extension bits (EMOD EDIV etc.) they are
849	 * low-order
850	 */
851	movb 0x10(%ap), (%r1)
852
853	movl 0x4(%ap), %r0	# %r0 - addr of source
854	movl 0xc(%ap), %r2	# %r2 - addr of exponent destination
855	extzv $0x7, $0x8, (%r0), (%r2)		# get exponent out
856	ret
857
858