1/*	$NetBSD: unimpl_emul.S,v 1.5 2024/04/16 00:03:30 kalvisd Exp $	*/
2
3/*
4 * Copyright (c) 2001 Brandon Creighton.  All rights reserved.
5 * Copyright (c) 2000 Ludd, University of Lule}, Sweden. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30
31#include <machine/asm.h>
32#include "assym.h"
33
34# Only intended for debugging emulation code (security hole)
35#undef	EMULATE_INKERNEL
36
37# Defines to fetch register operands
38#define	S_R0	(%fp)
39#define	S_R1	4(%fp)
40#define	S_R2	8(%fp)
41#define	S_R3	12(%fp)
42#define	S_R4	16(%fp)
43#define	S_R5	20(%fp)
44#define	S_R6	24(%fp)
45#define	S_R7	28(%fp)
46#define	S_R8	32(%fp)
47#define	S_R9	36(%fp)
48#define	S_R10	40(%fp)
49#define	S_R11	44(%fp)
50#define	S_AP	48(%fp)
51#define	S_FP	52(%fp)
52#define	S_SP	56(%fp)
53#define	S_PC	60(%fp)
54#define	S_PSL	64(%fp)
55
56# The condition codes.
57
58#define PSL_C	1
59#define PSL_V	2
60#define PSL_Z	4
61#define PSL_N	8
62#define PSL_Q  15		# all four
63
64#
65# Emulation of instruction trapped via SCB vector 0x18. (reserved op)
66#
67ALTENTRY(unimemu)
68	pushl	%r0
69	movl	8(%sp),%r0	# get trap address
70	movzbl	(%r0),%r0		# fetch insn generating trap
71	caseb	%r0,$0x74,$1	# case to jump to address
720:	.word	emodd-0b
73	.word	polyd-0b
74
751:	movl	(%sp)+,%r0	# restore reg
76	rsb			# continue fault
77
78#
79# switch the code back over to user mode.
80# puts the psl + pc (+ jsb return address) on top of user stack.
81#
82#ifdef EMULATE_INKERNEL
83touser:	movl	(%sp),-52(%sp)	# save rsb address on top of new stack
84	movl	4(%sp),%r0	# restore saved reg
85	addl2	$12,%sp		# pop junk from stack
86	pushr	$0x7fff		# save all regs
87	movl	%sp,%fp		# new frame pointer
88	tstl	-(%sp)		# remember old rsb address
89	incl	S_PC		# skip matching insn
90	rsb
91#else
92touser:	mfpr	$PR_USP,%r0	# get user stack pointer
93	movl	4(%sp),-68(%r0)	# move already saved %r0
94	movl	(%sp),-72(%r0)	# move return address
95	movq	12(%sp),-8(%r0)	# move pc + psl
96	addl2	$12,%sp		# remove moved fields from stack
97	movl	$1f,(%sp)	# change return address
98	rei
991:	subl2	$8,%sp		# trapaddr + psl already on stack
100	pushr	$0x7ffe		# %r0 already saved
101	subl2	$8,%sp		# do not trash %r0 + retaddr
102	movab	4(%sp),%fp
103	incl	S_PC		# skip matching insn
104	rsb
105#endif
106
107#
108# Restore registers, cleanup and continue
109#
110goback:	movl	%fp,%sp		# be sure
111	popr	$0x7fff		# restore all regs
112	rei
113
114/*
115 * getval: is used by the getval_* functions.  Gets the value specified by the
116 * current operand specifier pointed to by S_PC.  It also increments S_PC.
117 */
118getval:
119	clrq	%r0
120	pushr	$(R2+R3+R4+R5+R6)
121	movl	S_PC,%r3		# argument address
122	extzv	$4,$4,(%r3),%r2	# get mode
123	caseb	%r2,$0,$0xf
1240:	.word	getval_literal-0b		# 0-3 literal
125	.word	getval_literal-0b
126	.word	getval_literal-0b
127	.word	getval_literal-0b
128	.word	2f-0b		# 4 indexed
129	.word	getval_reg-0b			# 5 register
130	.word	getval_regdefer-0b		# 6 register deferred
131	.word	2f-0b		# 7 register deferred
132	.word	getval_ai-0b			# 8 autoincrement
133	.word	2f-0b		# 9 autoincrement deferred
134	.word	getval_bytedis-0b		# A byte displacement
135	.word	2f-0b		# B byte displacement deferred
136	.word	2f-0b		# C word displacement
137	.word	2f-0b		# D word displacement deferred
138	.word	getval_longdis-0b		# E longword displacement
139	.word	2f-0b		# F longword displacement deferred
140#ifdef EMULATE_INKERNEL
1412:	movab	0f,%r0
142	movl	%r2,%r1
143	brw	die
1440:	.asciz	"getval: missing address mode %d\n"
145#else
1462:	.word 	0xffff		# reserved operand
147#endif
148
149	/*
150	 * 0x00-0x03
151	 * Literal mode.  Note:  getval_{d,f}float will *never* use this routine
152	 * to get literal values, since they treat them differently (see those routines
153	 * for details).
154	 */
155getval_literal:
156	movzbl	(%r3)+,%r0	# correct operand
157	brw 4f
158
159	/*
160	 * 0x05
161     * Register mode.  Grab the register number, yank the value out.
162	 */
163getval_reg:
164	extzv	$0,$4,(%r3),%r2	# Get reg number
165	incl	%r3
166	ashl	$2,%r2,%r2
167	addl3	%fp,%r2,%r5
168	bsbw	emul_extract
169	brw		4f
170
171	/*
172	 * 0x06
173     * Register deferred mode.  Grab the register number, yank the value out,
174	 * use that as the address to get the real value.
175	 */
176getval_regdefer:
177	extzv	$0,$4,(%r3),%r2	# Get reg number
178	incl	%r3
179	ashl	$2,%r2,%r2
180	addl2	%fp,%r2
181	movl	(%r2),%r5
182	bsbw	emul_extract
183	brw		4f
184
185	/*
186	 * 0x08 Autoincrement mode
187     * Get the value in the register, use that as the address of our target,
188	 * then increment the register.
189	 */
190getval_ai:
191	extzv	$0,$4,(%r3),%r2	# Get reg number
192	incl	%r3
193
194	/*
195	 * In the case of the register being PC (0xf), this is called immediate mode;
196	 * we can treat it the same as any other register, as long as we keep %r3
197     * and S_PC in sync.  We do that here.
198	 */
199	movl 	%r3,S_PC
200
201	ashl	$2,%r2,%r2
202	addl2	%fp,%r2
203	movl	(%r2),%r5
204	bsbw	emul_extract
205	addl2	%r6,(%r2)
206
207	movl	S_PC,%r3		/* if PC did change, S_PC was changed too */
208	brw		4f
209
210	/*
211	 * 0xA
212	 * Byte displacement mode.
213     */
214getval_bytedis:
215	extzv	$0, $4, (%r3), %r2	# get register
216	incl	%r3
217	ashl	$2,%r2,%r2
218	addl2	%fp,%r2
219	movl	(%r2),%r5
220	movzbl	(%r3),%r4
221	incl	%r3
222	addl2	%r4, %r5
223	bsbw	emul_extract
224	brw		4f
225
226	/*
227	 * 0xE
228	 * Longword displacement mode.
229     */
230getval_longdis:
231	extzv	$0, $4, (%r3), %r2	# get register
232	incl	%r3
233	ashl	$2,%r2,%r2
234	addl2	%fp,%r2
235	movl	(%r2),%r5
236	movl	(%r3)+,%r4
237	addl2	%r4, %r5
238	bsbw	emul_extract
239
2404:	movl	%r3,S_PC
241	popr	$(R2+R3+R4+R5+R6)
242	rsb
243
244/*
245 * emul_extract: used by the getval functions.  This extracts exactly %r6 bytes
246 * from the address in %r5 and places them in %r0 and %r1 (if necessary).
247 * 8 is the current maximum length.
248 */
249emul_extract:
250	cmpl $0x8, %r6
251	bgeq 1f
252	.word 	0xffff		# reserved operand
2531:
254	caseb %r6, $0x1, $0x7
2550:	.word 1f-0b			# 1: byte
256	.word 2f-0b			# 2: word
257	.word 9f-0b			# unknown
258	.word 4f-0b			# 4: longword
259	.word 9f-0b			# unknown
260	.word 9f-0b			# unknown
261	.word 9f-0b			# unknown
262	.word 8f-0b			# 8: quadword
263
2641:	movzbl (%r5), %r0
265	rsb
266
2672:	movzwl (%r5), %r0
268	rsb
269
2704:	movl (%r5), %r0
271	rsb
272
2738:	movq (%r5), %r0
274	rsb
275
2769:
277	.word 	0xffff		# reserved operand
278	rsb
279
280getval_dfloat:
281	clrq	%r0
282	pushr	$(R2+R3+R6)	# use %r2+%r3 as scratch reg
283	movl	S_PC,%r3		# argument address
284	extzv	$4,$4,(%r3),%r2	# get mode
285	caseb	%r2,$0,$0x3
2860:	.word	1f-0b		# 0-3 literal
287	.word	1f-0b
288	.word	1f-0b
289	.word	1f-0b
290
291	movl	$0x8, %r6
292	bsbw	getval
293	brw	4f
294
2951:	insv	(%r3),$0,$3,%r0	# insert fraction
296	extzv	$3,$3,(%r3),%r2	# get exponent
297	addl2	$128,%r2		# bias the exponent
298	insv	%r2,$7,$8,%r0	# insert exponent
299	tstb	(%r3)+
300	movl	%r3,S_PC
3014:
302	popr	$(R2+R3+R6)
303	rsb
304
305getval_long:
306	clrl	%r0
307	pushr	$(R6+R1)
308	movl	$0x4, %r6
309	bsbw	getval
310	popr	$(R6+R1)
311	rsb
312
313getval_word:
314	clrl	%r0
315	pushr	$(R6+R1)
316	movl	$0x2, %r6
317	bsbw	getval
318	popr	$(R6+R1)
319	rsb
320
321getval_byte:
322	clrl	%r0
323	pushr	$(R6+R1)	# use %r2+%r3 as scratch reg
324	movl	$0x1, %r6
325	bsbw	getval
326	popr	$(R6+R1)
327	rsb
328
329#
330# getaddr_byte get 4 bytes and stores them in %r0. Increases PC.
331#
332getaddr_byte:
333	clrl	%r0
334	pushr	$(R2+R3)	# use %r2+%r3 as scratch reg
335	movl	S_PC,%r3		# argument address
336	extzv	$4,$4,(%r3),%r2	# get mode
337	caseb	%r2,$0,$0xf
3380:	.word	2f-0b		# 0-3 literal
339	.word	2f-0b
340	.word	2f-0b
341	.word	2f-0b
342	.word	2f-0b		# 4
343	.word	6f-0b		# 5 register
344	.word	5f-0b		# 6 deferred
345	.word	2f-0b		# 7 autodecr (missing)
346	.word	2f-0b		# 8 autoincr (missing)
347	.word	2f-0b		# 9 autoincr deferred (missing)
348	.word	7f-0b		# 10 byte disp
349	.word	2f-0b		# 11 byte disp deferred (missing)
350	.word	8f-0b		# 12 word disp
351	.word	2f-0b		# 13 word disp deferred (missing)
352	.word	1f-0b		# 14 long disp
353	.word	2f-0b		# 15 long disp deferred (missing)
354#ifdef EMULATE_INKERNEL
3552:	movab	3f,%r0
356	movl	%r2,%r1
357	brw	die		# reserved operand
3583:	.asciz	"getaddr_byte: missing address mode %d\n"
359#else
3602:	.word	0xffff		# reserved operand
361#endif
362
3631:	extzv	$0,$4,(%r3),%r2	# Get reg number
364	incl	%r3
365	movl	(%fp)[%r2],%r0	# Register contents
366	addl2	(%r3),%r0		# add displacement
367	cmpl	%r2,$15		# pc?
368	bneq	0f		# no, skip
369	addl2	$5,%r0		# compensate for displacement size
3700:	addl2	$4,%r3		# increase pc
371	brw	4f
372
3735:	extzv	$0,$4,(%r3),%r2	# Get reg number
374	incl	%r3
375	movl	(%fp)[%r2],%r0
376	brw	4f
377
3787:
379	extzv	$0, $4, (%r3), %r2	# get register
380	incl	%r3
381	movl	(%fp)[%r2],%r0		# Register contents
382	pushl	%r4
383	cvtbl	(%r3),%r4
384	addl2	%r4,%r0			# add displacement
385	movl	(%sp)+,%r4
386	cmpl	%r2,$15			# pc?
387	bneq	0f			# no, skip
388	addl2	$2,%r0			# compensate for displacement size
3890:	incl	%r3			# increase pc
390	brw	4f
391
3928:
393	extzv	$0, $4, (%r3), %r2	# get register
394	incl	%r3
395	movl	(%fp)[%r2],%r0		# Register contents
396	pushl	%r4
397	cvtwl	(%r3),%r4
398	addl2	%r4,%r0			# add displacement
399	movl	(%sp)+,%r4
400	cmpl	%r2,$15			# pc?
401	bneq	0f			# no, skip
402	addl2	$3,%r0			# compensate for displacement size
4030:	addl2	$2,%r3			# increase pc
404	brw	4f
405
4066:	extzv	$0,$4,(%r3),%r2	# Get reg number
407	incl	%r3
408	moval	(%fp)[%r2],%r0
409
4104:	movl	%r3,S_PC
411	popr	$(R2+R3)
412	rsb
413
414#
415# Polynomial calculation, d-float
416# Uses d-float instructions, so hopefully d-float is available.
417#
418# polyd MISSING:
419#	- check for bad arguments
420#	- set PSL flags
421#	- do not use d-float instructions (may be emulated)
422#
423polyd:	bsbw	touser		# go back to user mode
424	bsbw	getval_dfloat	# fetches argument to %r0/%r1
425	movq	%r0,%r6
426	bsbw	getval_word
427	movl	%r0,%r4
428	bsbw	getaddr_byte
429	movl	%r0,%r3
430	clrq	%r0
431# Ok, do the real calculation (Horner's method)
4320:	addd2	(%r3)+,%r0	# add constant
433	tstl	%r4		# more?
434	beql	1f		# no, exit
435	muld2	%r6,%r0		# multiply with arg
436	decl	%r4		# lower degree
437	brb	0b
438
4391:	movq	%r0,(%fp)
440	clrl	S_R2
441	movl	%r3,S_R3
442	clrq	S_R4
443	brw	goback
444
445
446#ifdef EMULATE_INKERNEL
447# When we end up somewhere we don't want.
448die:	pushl	%r1
449	pushl	%r0
450	calls	$2,_printf
451	movl	%fp,sp
452	brw	goback		# anything may happen
453#endif
454
455# these emodd-related
456#define TMPSIZE 0x20	/* temp bytes -- be careful with this! */
457#define PRECIS 0x7
458#define TMPFRAC1 (%ap)
459#define TMPFRAC2 32(%ap)
460#define TMPFRACTGT 64(%ap)
461#
462# Extended multiply/modulus
463# XXX just EMODD for now
464emodd:	bsbw	touser
465
466	/* Clear the condition codes; we will set them as needed later. */
467	bicl2 $(PSL_C|PSL_V|PSL_Z|PSL_N), S_PSL
468
469	/*
470	 * We temporarily appropriate ap for the use of TMPFRAC*.
471	 */
472	pushl %ap
473	subl2 $(3*TMPSIZE), %sp
474	movl %sp, %ap
475
476	movc5 $0x0, TMPFRAC1, $0x0, $TMPSIZE, TMPFRAC1
477	movc5 $0x0, TMPFRAC2, $0x0, $TMPSIZE, TMPFRAC2
478	movc5 $0x0, TMPFRACTGT, $0x0, $TMPSIZE, TMPFRACTGT
479
480	clrl -(%sp)
481	movl %sp, %r3		/* %r3 = addr of exp space (1) */
482	clrl -(%sp)
483	movl %sp, %r5		/* %r5 = addr of exp space (2) */
484	subl2 $0x10, %sp
485	movl %sp, %r6		/* %r6 = addr of allocated target space */
486
487	/*
488	 * Now we package both numbers up and call fltext_De, which
489	 * will remove the exponent and sign; this will make them
490	 * easier to work with.  They will be in TMPFRAC1 and
491	 * TMPFRAC2 when done.
492	 */
493	bsbw getval_dfloat 	# get operand into %r0 and %r1
494
495	/* Check for sign = 0 and exp = 0; if it is, zeroexit. */
496	bicl3 $0x7f, %r0, %r4
497	cmpl %r4, $0x0
498	bneq 1f
499	bsbw getval_byte	# get multiplier extension operand
500	bsbw getval_dfloat	# get target operand
501	jmp zeroexit
5021:
503
504	/* Check for sign = 1 and exp = 0; if it is, do a resopflt. */
505	cmpw %r0, $0x8000
506	bneq 1f
507	bsbw getval_byte	# get multiplier extension operand
508	bsbw getval_dfloat 	# get operand into %r0 and %r1
509	extzv $0, $0xff, %r0, %r0	# generate a resopflt -- XXX is this ok?
5101:
511	movd %r0, TMPFRACTGT
512	bicl3 $0xffff7fff, %r0, %r6 # Extract the sign while we're here.
513	bsbw getval_byte	# get multiplier extension operand
514	movzbl %r0, -(%sp)
515	movd %r9, %r0
516	pushl %r3
517	pushab TMPFRAC1
518	movab TMPFRACTGT, -(%sp)
519	calls $0x4, fltext_De
520
521	bsbw getval_dfloat 	# get operand into %r0 and %r1
522
523	/* Check for sign = 0 and exp = 0; if it is, zeroexit. */
524	bicl3 $0x7f, %r0, %r4
525	cmpl %r4, $0x0
526	bneq 1f
527	bsbw getval_byte	# get multiplier extension operand
528	bsbw getval_dfloat	# get target operand
529	jmp zeroexit
5301:
531	/* Check for sign = 1 and exp = 0; if it is, do a resopflt. */
532	cmpw %r0, $0x8000
533	bneq 1f
534	bsbw getval_byte	# get multiplier extension operand
535	bsbw getval_dfloat 	# get operand into %r0 and %r1
536	extzv $0, $0xff, %r0, %r0		# generate a resopflt -- XXX is this ok?
5371:
538
539	movd %r0, TMPFRACTGT
540	bicl3 $0xffff7fff, %r0, %r7 # Extract the sign while we're here.
541	movzbl $0x0, -(%sp)	# no multiplier extension here
542	pushl %r5
543	pushab TMPFRAC2
544	movab TMPFRACTGT, -(%sp)
545	calls $0x4, fltext_De
546
547	/* first, add exponents */
548	addl3 (%r5), (%r3), %r9	/* %r9 = exponent (used later) */
549	subl2 $0x80, %r9			/* we are excess-128 */
550
551	/*
552	 * Let's calculate the target sign.  Signs from multipliers are in %r6 and
553	 * %r7, and both the fraction and integer parts have the same sign.
554	 */
555	xorl2 %r7, %r6
556
557	pushab TMPFRAC1
558	calls $0x1, bitcnt
559	movl %r0, %r1			/* %r1 = bitcount of TMPFRAC1 */
560	pushab TMPFRAC2
561	calls $0x1, bitcnt
562	movl %r0, %r2			/* %r2 = bitcount of TMPFRAC2 */
563
564	/*
565	 * Now we get ready to multiply.  This multiplies a byte at a time,
566	 * converting to double with CVTLD and adding partial results to
567	 * TMPFRACTGT.  There's probably a faster way to do this.
568	 */
569	clrd TMPFRACTGT
570	pushr $0x7fc
571	subl2 $0x8, %sp			/* make some temporary space */
572	movl %sp, %r1
573	subl2 $0x8, %sp
574	movl %sp, %r2
575
576	movl $PRECIS, %r5			/* %r5 = TMPFRAC1 byte count */
577	movl $PRECIS, %r6			/* %r6 = TMPFRAC2 byte count */
578	clrl %r7
579
5801:
581#	addl3 %r5, $TMPFRAC1, %r3		/* %r3 - current byte in tmpfrac1 */
582	movab TMPFRAC1, %r7
583	addl3 %r5, %r7, %r3
584#	addl3 %r6, $TMPFRAC2, %r4		/* %r4 - current byte in tmpfrac2 */
585	movab TMPFRAC2, %r7
586	addl3 %r6, %r7, %r4
587
588	movzbl (%r3), %r10
589	movzbl (%r4), %r11
590	mull3 %r10, %r11, %r7
591	movl %r7, %r3
592	cvtld %r7, (%r2)
593
594	subl3 %r5, $0x8, %r8
595	subl3 %r6, $0x8, %r9
596	addl2 %r8, %r9
597	mull2 $0x8, %r9
598	subl2 $0x40, %r9
599	blss 9f
600
601	/* This may be bigger than a longword.  Break it up. */
6025:	cmpl %r9, $0x1e
603	bleq 6f
604	subl2 $0x1e, %r9
605
606	ashl $0x1e, $0x1, %r8
607	cvtld %r8, (%r1)
608	muld2 (%r1), (%r2)
609	jmp 5b
6106:
611	ashl %r9, $0x1, %r8
612	cvtld %r8, (%r1)
613	muld2 (%r1), (%r2)
614	addd2 (%r2), TMPFRACTGT
615
6169:
617	cmpl %r5, $0x0
618	beql 2f
619	decl %r5
620	jmp 1b
6212:	cmpl %r6, $0x0
622	beql 3f
623	decl %r6
624	movl $PRECIS, %r5
625	jmp 1b
6263:
627
628	/*
629	 * At this point, %r9 might not reflect the final exponent we will use;
630	 * i.e., we need post-normalization.  Luckily, we still have (in %r7)
631	 * the results from the last individual multiplication handy.  Here
632	 * we calculate how many bits it will take to shift the value in %r7
633	 * so that bit 15 = 1.
634	 */
635	addl2 $0x10, %sp
636	movl %r7, 0x14(%sp)	/* move %r7 onto the frame we're about to pop off */
637   	popr  $0x7fc
638
639	clrl %r3	/* %r3 = counter */
640	movl %r7, %r8		/* %r8 = temp */
6411:
642	bicl3 $0xffff7fff, %r8, %r5
643	bneq 2f
644	incl %r3
645	ashl $0x1, %r8, %r5
646	movl %r5, %r8
647	jmp 1b
6482:
649
650	/*
651	 * Now we do post-normalization (by subtracting %r3) and
652	 * put the exponent (in %r9) into TMPFRACTGT.
653	 */
654	subl2 %r3, %r9
655	insv %r9, $0x7, $0x8, TMPFRACTGT
656
657	bisl2 %r6, TMPFRACTGT	# set the sign
658
659	/*
660	 * Now we need to separate.  CVT* won't work in the case of a
661	 * >32-bit integer, so we count the integer bits and use ASHQ to
662	 * shift them away.
663	 */
664	cmpl $0x80, %r9
665	blss 7f		/* if we are less than 1.0, we can avoid this */
666	brw 8f
6677:
668	subl3 $0x80, %r9, %r8
669
670	movq TMPFRACTGT, TMPFRAC1
671	/*
672	 * Check for integer overflow by comparing the integer bit count.
673	 * If this is the case, set V in PSL.
674	 */
675	cmpl %r8, $0x20
676	blss 3f
677	bisl2 $PSL_V, S_PSL
6783:
679	cmpl %r8, $0x38
680	blss 1f
681	/*
682	 * In the case where we have more than 55 bits in the integer,
683	 * there aren't any bits left for the fraction.  Therefore we're
684	 * done here;  TMPFRAC1 is equal to TMPFRACTGT and TMPFRAC2 is 0.
685	 */
686	movq $0d0.0, TMPFRAC2
687	jmp 9f		/* we're done, move on */
6881:
689	/*
690	 * We do the mod by using ASHQ to shift and truncate the bits.
691	 * Before that happens, we have to arrange the bits in a quadword such
692	 * that the significance increases from start to finish.
693	 */
694
695	movab TMPFRACTGT, %r0
696	movab TMPFRAC1, %r1
697	movb (%r0), 7(%r1)
698	bisb2 $0x80, 7(%r1)
699	movw 2(%r0), 5(%r1)
700	movw 4(%r0), 3(%r1)
701	movb 7(%r0), 2(%r1)
702	movb 6(%r0), 1(%r1)
703
704	/* Calculate exactly how many bits to shift. */
705	subl3 %r8, $0x40, %r7
706	mnegl %r7, %r6
707	ashq %r6, TMPFRAC1, %r0			# shift right
708	ashq %r7, %r0, TMPFRAC2			# shift left
709
710	/* Now put it back into a D_. */
711	movab TMPFRAC2, %r0
712	movab TMPFRAC1, %r1
713 	extv $0x18, $0x7, 4(%r0), (%r1)
714	extzv $0x7, $0x9, TMPFRACTGT, %r2
715	insv %r2, $0x7, $0x9, (%r1)
716
717	movw 5(%r0), 2(%r1)
718	movw 3(%r0), 4(%r1)
719	movw 1(%r0), 6(%r1)
720
721	# we have the integer in TMPFRAC1, now get the fraction in TMPFRAC2
722	subd3 TMPFRAC1, TMPFRACTGT, TMPFRAC2
723	jmp 9f
724
7258:
726	/*
727	 * We are less than 1.0; TMPFRAC1 should be 0, and TMPFRAC2 should
728	 * be equal to TMPFRACTGT.
729	 */
730	movd $0d0.0, TMPFRAC1
731	movd TMPFRACTGT, TMPFRAC2
7329:
733	/*
734	 * We're done. We can use CVTDL here, since EMODD is supposed to
735	 * truncate.
736	 */
737	cvtdl TMPFRAC1, %r4
738	bsbw getaddr_byte
739	movl %r4, (%r0)
740
741	bsbw getaddr_byte
742	movq TMPFRAC2, (%r0)
743	movd TMPFRAC2, %r0		/* move this here so we can test it later */
744
745	/* Clean up sp. */
746
747	addl2 $0x74, %sp
748	movl (%sp)+, %ap
749
750	/*
751	 * Now set condition codes.  We know Z == 0; C is always 0; and V
752	 * is set above as necessary.  Check to see if TMPFRAC2 is
753	 * negative; if it is, set N.
754	 */
755	tstd %r0
756	bgeq 1f /* branch if N == 0 */
757	bisl2 $PSL_N, S_PSL
7581:
759	brw goback
760zeroexit:
761	/* Z == 1, everything else has been cleared already */
762	bisl2 $PSL_Z, S_PSL
763	bsbw getaddr_byte
764	movl $0x0, (%r0)
765	bsbw getaddr_byte
766	movd $0d0, (%r0)
767	brw goback
768
769
770
771/*
772 * bitcnt: counts significant bits backwards in a quadword
773 * returns number of bits, unless there aren't any;
774 * in that case it will return $0xffffffff
775 */
776bitcnt:
777	.word 0xffe	/* %r1-%r12 */
778
779	/*
780	 * Our goal is to factor a common power of 2 out of each of the
781	 * two factors involved in the multiplication.  Once we have that,
782	 * we can multiply them as integers.  More below.
783	 * Right now we are counting bits, starting from the highest octet
784	 * of each (the *least* significant bit at this point!) and doing
785	 * FFSes until we find a bit set.
786	 */
787	movl 4(%ap), %r0
788	movl $0x8, %r1
7891:	decl %r1
790	addl3 %r1, %r0, %r4
791	movzbl (%r4), %r2
792	ffs $0, $0x20, %r2, %r3
793	bneq 2f		/* if we found a bit, Z == 0, continue */
794	cmpl %r1, $0x0
795	jeql 3f /* if %r1 is zero and there's no bit set, qw is 0 */
796	jmp 1b			/* else continue with the loop */
797
7982:	/*
799	 * We found a bit; its position in the byte is in %r3, and %r1 is the
800	 * position of the byte in the quadword.
801	 */
802	subl3 %r3, $0x8, %r0
803	ashl $0x5, %r1, %r2
804	addl2 %r2, %r0
805	ret
806
8073:	/* this quadword is 0 */
808	movl $0xffffffff, %r0
809	ret
810
811
812/*
813 * The fltext_X routines separate fraction and exponent* bits.
814 * They return (via %r0) the amount of bits in the fraction.
815 *
816 * *: exponents are left in excess-128 form
817 *        D_ floating point first word:
818 *         F E      7 6     0
819 *        +-+--------+-------+
820 * sign-> |s|exponent| fract.|  (10-3F = fraction bits)
821 *        +-+--------+-------+
822 *        Significance order: 0-6, 10-1F, 20-2F, 30-3F
823 *
824 * The fourth argument to fltext_De is the eight extra bits for use
825 * in EMOD*, et al.  If these bits are not in use, specify 0.
826 */
827fltext_De:
828	.word 0x831	# %r0 %r1 %r2 %r3 %r4 ap (no return)
829
830	movl 0x4(%ap), %r0	# %r0 - addr of source
831	movl 0x8(%ap), %r1	# %r1 - addr of fraction destination
832
833	movb (%r0), (%r1)
834	bisb2 $0x80, (%r1)+	# This is the hidden bit.
835
836	movb 3(%r0), (%r1)+
837	movb 2(%r0), (%r1)+
838	movb 5(%r0), (%r1)+
839	movb 4(%r0), (%r1)+
840	movb 7(%r0), (%r1)+
841	movb 6(%r0), (%r1)+
842
843	/*
844	 * if there are extension bits (EMOD EDIV etc.) they are
845	 * low-order
846	 */
847	movb 0x10(%ap), (%r1)
848
849	movl 0x4(%ap), %r0	# %r0 - addr of source
850	movl 0xc(%ap), %r2	# %r2 - addr of exponent destination
851	extzv $0x7, $0x8, (%r0), (%r2)		# get exponent out
852	ret
853
854