1/* SPDX-License-Identifier: GPL-2.0 */
2	.file	"wm_sqrt.S"
3/*---------------------------------------------------------------------------+
4 |  wm_sqrt.S                                                                |
5 |                                                                           |
6 | Fixed point arithmetic square root evaluation.                            |
7 |                                                                           |
8 | Copyright (C) 1992,1993,1995,1997                                         |
9 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
10 |                       Australia.  E-mail billm@suburbia.net               |
11 |                                                                           |
12 | Call from C as:                                                           |
13 |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
14 |                                                                           |
15 +---------------------------------------------------------------------------*/
16
17/*---------------------------------------------------------------------------+
18 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
19 |    returns the square root of n in n.                                     |
20 |                                                                           |
21 |  Use Newton's method to compute the square root of a number, which must   |
22 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
23 |  Does not check the sign or tag of the argument.                          |
24 |  Sets the exponent, but not the sign or tag of the result.                |
25 |                                                                           |
26 |  The guess is kept in %esi:%edi                                           |
27 +---------------------------------------------------------------------------*/
28
29#include "exception.h"
30#include "fpu_emu.h"
31
32
33#ifndef NON_REENTRANT_FPU
34/*	Local storage on the stack: */
35#define FPU_accum_3	-4(%ebp)	/* ms word */
36#define FPU_accum_2	-8(%ebp)
37#define FPU_accum_1	-12(%ebp)
38#define FPU_accum_0	-16(%ebp)
39
40/*
41 * The de-normalised argument:
42 *                  sq_2                  sq_1              sq_0
43 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
44 *           ^ binary point here
45 */
46#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
47#define FPU_fsqrt_arg_1	-24(%ebp)
48#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */
49
50#else
51/*	Local storage in a static area: */
52.data
53	.align 4,0
54FPU_accum_3:
55	.long	0		/* ms word */
56FPU_accum_2:
57	.long	0
58FPU_accum_1:
59	.long	0
60FPU_accum_0:
61	.long	0
62
63/* The de-normalised argument:
64                    sq_2                  sq_1              sq_0
65          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
66             ^ binary point here
67 */
68FPU_fsqrt_arg_2:
69	.long	0		/* ms word */
70FPU_fsqrt_arg_1:
71	.long	0
72FPU_fsqrt_arg_0:
73	.long	0		/* ls word, at most the ms bit is set */
74#endif /* NON_REENTRANT_FPU */
75
76
77.text
78SYM_FUNC_START(wm_sqrt)
79	pushl	%ebp
80	movl	%esp,%ebp
81#ifndef NON_REENTRANT_FPU
82	subl	$28,%esp
83#endif /* NON_REENTRANT_FPU */
84	pushl	%esi
85	pushl	%edi
86	pushl	%ebx
87
88	movl	PARAM1,%esi
89
90	movl	SIGH(%esi),%eax
91	movl	SIGL(%esi),%ecx
92	xorl	%edx,%edx
93
94/* We use a rough linear estimate for the first guess.. */
95
96	cmpw	EXP_BIAS,EXP(%esi)
97	jnz	sqrt_arg_ge_2
98
99	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
100	rcrl	$1,%ecx
101	rcrl	$1,%edx
102
103sqrt_arg_ge_2:
104/* From here on, n is never accessed directly again until it is
105   replaced by the answer. */
106
107	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
108	movl	%ecx,FPU_fsqrt_arg_1
109	movl	%edx,FPU_fsqrt_arg_0
110
111/* Make a linear first estimate */
112	shrl	$1,%eax
113	addl	$0x40000000,%eax
114	movl	$0xaaaaaaaa,%ecx
115	mull	%ecx
116	shll	%edx			/* max result was 7fff... */
117	testl	$0x80000000,%edx	/* but min was 3fff... */
118	jnz	sqrt_prelim_no_adjust
119
120	movl	$0x80000000,%edx	/* round up */
121
122sqrt_prelim_no_adjust:
123	movl	%edx,%esi	/* Our first guess */
124
125/* We have now computed (approx)   (2 + x) / 3, which forms the basis
126   for a few iterations of Newton's method */
127
128	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */
129
130/*
131 * From our initial estimate, three iterations are enough to get us
132 * to 30 bits or so. This will then allow two iterations at better
133 * precision to complete the process.
134 */
135
136/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
137	shrl	%ecx		/* Doing this first will prevent a divide */
138				/* overflow later. */
139
140	movl	%ecx,%edx	/* msw of the arg / 2 */
141	divl	%esi		/* current estimate */
142	shrl	%esi		/* divide by 2 */
143	addl	%eax,%esi	/* the new estimate */
144
145	movl	%ecx,%edx
146	divl	%esi
147	shrl	%esi
148	addl	%eax,%esi
149
150	movl	%ecx,%edx
151	divl	%esi
152	shrl	%esi
153	addl	%eax,%esi
154
155/*
156 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
157 * we improve it to 60 bits or so.
158 *
159 * The strategy from now on is to compute new estimates from
160 *      guess := guess + (n - guess^2) / (2 * guess)
161 */
162
163/* First, find the square of the guess */
164	movl	%esi,%eax
165	mull	%esi
166/* guess^2 now in %edx:%eax */
167
168	movl	FPU_fsqrt_arg_1,%ecx
169	subl	%ecx,%eax
170	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
171	sbbl	%ecx,%edx
172	jnc	sqrt_stage_2_positive
173
174/* Subtraction gives a negative result,
175   negate the result before division. */
176	notl	%edx
177	notl	%eax
178	addl	$1,%eax
179	adcl	$0,%edx
180
181	divl	%esi
182	movl	%eax,%ecx
183
184	movl	%edx,%eax
185	divl	%esi
186	jmp	sqrt_stage_2_finish
187
188sqrt_stage_2_positive:
189	divl	%esi
190	movl	%eax,%ecx
191
192	movl	%edx,%eax
193	divl	%esi
194
195	notl	%ecx
196	notl	%eax
197	addl	$1,%eax
198	adcl	$0,%ecx
199
200sqrt_stage_2_finish:
201	sarl	$1,%ecx		/* divide by 2 */
202	rcrl	$1,%eax
203
204	/* Form the new estimate in %esi:%edi */
205	movl	%eax,%edi
206	addl	%ecx,%esi
207
208	jnz	sqrt_stage_2_done	/* result should be [1..2) */
209
210#ifdef PARANOID
211/* It should be possible to get here only if the arg is ffff....ffff */
212	cmpl	$0xffffffff,FPU_fsqrt_arg_1
213	jnz	sqrt_stage_2_error
214#endif /* PARANOID */
215
216/* The best rounded result. */
217	xorl	%eax,%eax
218	decl	%eax
219	movl	%eax,%edi
220	movl	%eax,%esi
221	movl	$0x7fffffff,%eax
222	jmp	sqrt_round_result
223
224#ifdef PARANOID
225sqrt_stage_2_error:
226	pushl	EX_INTERNAL|0x213
227	call	EXCEPTION
228#endif /* PARANOID */
229
230sqrt_stage_2_done:
231
232/* Now the square root has been computed to better than 60 bits. */
233
234/* Find the square of the guess. */
235	movl	%edi,%eax		/* ls word of guess */
236	mull	%edi
237	movl	%edx,FPU_accum_1
238
239	movl	%esi,%eax
240	mull	%esi
241	movl	%edx,FPU_accum_3
242	movl	%eax,FPU_accum_2
243
244	movl	%edi,%eax
245	mull	%esi
246	addl	%eax,FPU_accum_1
247	adcl	%edx,FPU_accum_2
248	adcl	$0,FPU_accum_3
249
250/*	movl	%esi,%eax */
251/*	mull	%edi */
252	addl	%eax,FPU_accum_1
253	adcl	%edx,FPU_accum_2
254	adcl	$0,FPU_accum_3
255
256/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
257
258	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
259	subl	%eax,FPU_accum_1
260	movl	FPU_fsqrt_arg_1,%eax
261	sbbl	%eax,FPU_accum_2
262	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
263	sbbl	%eax,FPU_accum_3
264	jnc	sqrt_stage_3_positive
265
266/* Subtraction gives a negative result,
267   negate the result before division */
268	notl	FPU_accum_1
269	notl	FPU_accum_2
270	notl	FPU_accum_3
271	addl	$1,FPU_accum_1
272	adcl	$0,FPU_accum_2
273
274#ifdef PARANOID
275	adcl	$0,FPU_accum_3	/* This must be zero */
276	jz	sqrt_stage_3_no_error
277
278sqrt_stage_3_error:
279	pushl	EX_INTERNAL|0x207
280	call	EXCEPTION
281
282sqrt_stage_3_no_error:
283#endif /* PARANOID */
284
285	movl	FPU_accum_2,%edx
286	movl	FPU_accum_1,%eax
287	divl	%esi
288	movl	%eax,%ecx
289
290	movl	%edx,%eax
291	divl	%esi
292
293	sarl	$1,%ecx		/* divide by 2 */
294	rcrl	$1,%eax
295
296	/* prepare to round the result */
297
298	addl	%ecx,%edi
299	adcl	$0,%esi
300
301	jmp	sqrt_stage_3_finished
302
303sqrt_stage_3_positive:
304	movl	FPU_accum_2,%edx
305	movl	FPU_accum_1,%eax
306	divl	%esi
307	movl	%eax,%ecx
308
309	movl	%edx,%eax
310	divl	%esi
311
312	sarl	$1,%ecx		/* divide by 2 */
313	rcrl	$1,%eax
314
315	/* prepare to round the result */
316
317	notl	%eax		/* Negate the correction term */
318	notl	%ecx
319	addl	$1,%eax
320	adcl	$0,%ecx		/* carry here ==> correction == 0 */
321	adcl	$0xffffffff,%esi
322
323	addl	%ecx,%edi
324	adcl	$0,%esi
325
326sqrt_stage_3_finished:
327
328/*
329 * The result in %esi:%edi:%esi should be good to about 90 bits here,
330 * and the rounding information here does not have sufficient accuracy
331 * in a few rare cases.
332 */
333	cmpl	$0xffffffe0,%eax
334	ja	sqrt_near_exact_x
335
336	cmpl	$0x00000020,%eax
337	jb	sqrt_near_exact
338
339	cmpl	$0x7fffffe0,%eax
340	jb	sqrt_round_result
341
342	cmpl	$0x80000020,%eax
343	jb	sqrt_get_more_precision
344
345sqrt_round_result:
346/* Set up for rounding operations */
347	movl	%eax,%edx
348	movl	%esi,%eax
349	movl	%edi,%ebx
350	movl	PARAM1,%edi
351	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
352	jmp	fpu_reg_round
353
354
355sqrt_near_exact_x:
356/* First, the estimate must be rounded up. */
357	addl	$1,%edi
358	adcl	$0,%esi
359
360sqrt_near_exact:
361/*
362 * This is an easy case because x^1/2 is monotonic.
363 * We need just find the square of our estimate, compare it
364 * with the argument, and deduce whether our estimate is
365 * above, below, or exact. We use the fact that the estimate
366 * is known to be accurate to about 90 bits.
367 */
368	movl	%edi,%eax		/* ls word of guess */
369	mull	%edi
370	movl	%edx,%ebx		/* 2nd ls word of square */
371	movl	%eax,%ecx		/* ls word of square */
372
373	movl	%edi,%eax
374	mull	%esi
375	addl	%eax,%ebx
376	addl	%eax,%ebx
377
378#ifdef PARANOID
379	cmp	$0xffffffb0,%ebx
380	jb	sqrt_near_exact_ok
381
382	cmp	$0x00000050,%ebx
383	ja	sqrt_near_exact_ok
384
385	pushl	EX_INTERNAL|0x214
386	call	EXCEPTION
387
388sqrt_near_exact_ok:
389#endif /* PARANOID */
390
391	or	%ebx,%ebx
392	js	sqrt_near_exact_small
393
394	jnz	sqrt_near_exact_large
395
396	or	%ebx,%edx
397	jnz	sqrt_near_exact_large
398
399/* Our estimate is exactly the right answer */
400	xorl	%eax,%eax
401	jmp	sqrt_round_result
402
403sqrt_near_exact_small:
404/* Our estimate is too small */
405	movl	$0x000000ff,%eax
406	jmp	sqrt_round_result
407
408sqrt_near_exact_large:
409/* Our estimate is too large, we need to decrement it */
410	subl	$1,%edi
411	sbbl	$0,%esi
412	movl	$0xffffff00,%eax
413	jmp	sqrt_round_result
414
415
416sqrt_get_more_precision:
417/* This case is almost the same as the above, except we start
418   with an extra bit of precision in the estimate. */
419	stc			/* The extra bit. */
420	rcll	$1,%edi		/* Shift the estimate left one bit */
421	rcll	$1,%esi
422
423	movl	%edi,%eax		/* ls word of guess */
424	mull	%edi
425	movl	%edx,%ebx		/* 2nd ls word of square */
426	movl	%eax,%ecx		/* ls word of square */
427
428	movl	%edi,%eax
429	mull	%esi
430	addl	%eax,%ebx
431	addl	%eax,%ebx
432
433/* Put our estimate back to its original value */
434	stc			/* The ms bit. */
435	rcrl	$1,%esi		/* Shift the estimate left one bit */
436	rcrl	$1,%edi
437
438#ifdef PARANOID
439	cmp	$0xffffff60,%ebx
440	jb	sqrt_more_prec_ok
441
442	cmp	$0x000000a0,%ebx
443	ja	sqrt_more_prec_ok
444
445	pushl	EX_INTERNAL|0x215
446	call	EXCEPTION
447
448sqrt_more_prec_ok:
449#endif /* PARANOID */
450
451	or	%ebx,%ebx
452	js	sqrt_more_prec_small
453
454	jnz	sqrt_more_prec_large
455
456	or	%ebx,%ecx
457	jnz	sqrt_more_prec_large
458
459/* Our estimate is exactly the right answer */
460	movl	$0x80000000,%eax
461	jmp	sqrt_round_result
462
463sqrt_more_prec_small:
464/* Our estimate is too small */
465	movl	$0x800000ff,%eax
466	jmp	sqrt_round_result
467
468sqrt_more_prec_large:
469/* Our estimate is too large */
470	movl	$0x7fffff00,%eax
471	jmp	sqrt_round_result
472SYM_FUNC_END(wm_sqrt)
473