fpu_cordic.c revision 1.1
1/*	$NetBSD: fpu_cordic.c,v 1.1 2013/04/19 13:31:11 isaki Exp $	*/
2
3/*
4 * Copyright (c) 2013 Tetsuya Isaki. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
22 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: fpu_cordic.c,v 1.1 2013/04/19 13:31:11 isaki Exp $");
30
31#include <machine/ieee.h>
32
33#include "fpu_emulate.h"
34
35/*
36 * sfpn = shoftened fp number; the idea is from fpu_log.c but not the same.
37 * The most significant byte of sp_m0 is EXP (signed byte) and the rest
38 * of sp_m0 is fp_mant[0].
39 */
40struct sfpn {
41	uint32_t sp_m0;
42	uint32_t sp_m1;
43	uint32_t sp_m2;
44};
45
46#if defined(CORDIC_BOOTSTRAP)
47/*
48 * This is a bootstrap code to generate a pre-calculated tables such as
49 * atan_table[] and atanh_table[].  However, it's just for reference.
50 * If you want to run the bootstrap, you will define CORDIC_BOOTSTRAP
51 * and modify these files as a userland application.
52 */
53
54#include <stdio.h>
55#include <stdlib.h>
56#include <string.h>
57#include <float.h>
58
59static void prepare_cordic_const(struct fpemu *);
60static struct fpn *fpu_gain1_cordic(struct fpemu *);
61static struct fpn *fpu_gain2_cordic(struct fpemu *);
62static struct fpn *fpu_atan_tayler(struct fpemu *);
63static void printf_fpn(const struct fpn *);
64static void printf_sfpn(const struct sfpn *);
65static void fpn_to_sfpn(struct sfpn *, const struct fpn *);
66
67static struct sfpn atan_table[EXT_FRACBITS];
68static struct sfpn atanh_table[EXT_FRACBITS];
69static struct fpn inv_gain1;
70static struct fpn inv_gain2;
71
72int
73main(int argc, char *argv[])
74{
75	struct fpemu dummyfe;
76	int i;
77	struct fpn fp;
78
79	memset(&dummyfe, 0, sizeof(dummyfe));
80	prepare_cordic_const(&dummyfe);
81
82	/* output as source code */
83	printf("static const struct sfpn atan_table[] = {\n");
84	for (i = 0; i < EXT_FRACBITS; i++) {
85		printf("\t");
86		printf_sfpn(&atan_table[i]);
87		printf(",\n");
88	}
89	printf("};\n\n");
90
91	printf("static const struct sfpn atanh_table[] = {\n");
92	for (i = 0; i < EXT_FRACBITS; i++) {
93		printf("\t");
94		printf_sfpn(&atanh_table[i]);
95		printf(",\n");
96	}
97	printf("};\n\n");
98
99	printf("const struct fpn fpu_cordic_inv_gain1 =\n\t");
100	printf_fpn(&inv_gain1);
101	printf(";\n\n");
102
103	printf("const struct fpn fpu_cordic_inv_gain2 =\n\t");
104	printf_fpn(&inv_gain2);
105	printf(";\n\n");
106}
107
108/*
109 * This routine uses fpu_const(), fpu_add(), fpu_div(), fpu_logn()
110 * and fpu_atan_tayler() as bootstrap.
111 */
112static void
113prepare_cordic_const(struct fpemu *fe)
114{
115	struct fpn t;
116	struct fpn x;
117	struct fpn *r;
118	int i;
119
120	/* atan_table and atanh_table */
121	fpu_const(&t, FPU_CONST_1);
122	for (i = 0; i < EXT_FRACBITS; i++) {
123		/* atan(t) */
124		CPYFPN(&fe->fe_f2, &t);
125		r = fpu_atan_tayler(fe);
126		fpn_to_sfpn(&atan_table[i], r);
127
128		/* t /= 2 */
129		t.fp_exp--;
130
131		/* (1-t) */
132		fpu_const(&fe->fe_f1, FPU_CONST_1);
133		CPYFPN(&fe->fe_f2, &t);
134		fe->fe_f2.fp_sign = 1;
135		r = fpu_add(fe);
136		CPYFPN(&x, r);
137
138		/* (1+t) */
139		fpu_const(&fe->fe_f1, FPU_CONST_1);
140		CPYFPN(&fe->fe_f2, &t);
141		r = fpu_add(fe);
142
143		/* r = (1+t)/(1-t) */
144		CPYFPN(&fe->fe_f1, r);
145		CPYFPN(&fe->fe_f2, &x);
146		r = fpu_div(fe);
147
148		/* r = log(r) */
149		CPYFPN(&fe->fe_f2, r);
150		r = fpu_logn(fe);
151
152		/* r /= 2 */
153		r->fp_exp--;
154
155		fpn_to_sfpn(&atanh_table[i], r);
156	}
157
158	/* inv_gain1 = 1 / gain1cordic() */
159	r = fpu_gain1_cordic(fe);
160	CPYFPN(&fe->fe_f2, r);
161	fpu_const(&fe->fe_f1, FPU_CONST_1);
162	r = fpu_div(fe);
163	CPYFPN(&inv_gain1, r);
164
165	/* inv_gain2 = 1 / gain2cordic() */
166	r = fpu_gain2_cordic(fe);
167	CPYFPN(&fe->fe_f2, r);
168	fpu_const(&fe->fe_f1, FPU_CONST_1);
169	r = fpu_div(fe);
170	CPYFPN(&inv_gain2, r);
171}
172
173static struct fpn *
174fpu_gain1_cordic(struct fpemu *fe)
175{
176	struct fpn x;
177	struct fpn y;
178	struct fpn z;
179	struct fpn v;
180
181	fpu_const(&x, FPU_CONST_1);
182	fpu_const(&y, FPU_CONST_0);
183	fpu_const(&z, FPU_CONST_0);
184	CPYFPN(&v, &x);
185	v.fp_sign = !v.fp_sign;
186
187	fpu_cordit1(fe, &x, &y, &z, &v);
188	CPYFPN(&fe->fe_f2, &x);
189	return &fe->fe_f2;
190}
191
192static struct fpn *
193fpu_gain2_cordic(struct fpemu *fe)
194{
195	struct fpn x;
196	struct fpn y;
197	struct fpn z;
198	struct fpn v;
199
200	fpu_const(&x, FPU_CONST_1);
201	fpu_const(&y, FPU_CONST_0);
202	fpu_const(&z, FPU_CONST_0);
203	CPYFPN(&v, &x);
204	v.fp_sign = !v.fp_sign;
205
206	fpu_cordit2(fe, &x, &y, &z, &v);
207	CPYFPN(&fe->fe_f2, &x);
208	return &fe->fe_f2;
209}
210
211/*
212 * arctan(x) = pi/4 (for |x| = 1)
213 *
214 *                 x^3   x^5   x^7
215 * arctan(x) = x - --- + --- - --- + ...   (for |x| < 1)
216 *                  3     5     7
217 */
218static struct fpn *
219fpu_atan_tayler(struct fpemu *fe)
220{
221	struct fpn res;
222	struct fpn x2;
223	struct fpn s0;
224	struct fpn *s1;
225	struct fpn *r;
226	uint32_t k;
227
228	/* arctan(1) is pi/4 */
229	if (fe->fe_f2.fp_exp == 0) {
230		fpu_const(&fe->fe_f2, FPU_CONST_PI);
231		fe->fe_f2.fp_exp -= 2;
232		return &fe->fe_f2;
233	}
234
235	/* s0 := x */
236	CPYFPN(&s0, &fe->fe_f2);
237
238	/* res := x */
239	CPYFPN(&res, &fe->fe_f2);
240
241	/* x2 := x * x */
242	CPYFPN(&fe->fe_f1, &fe->fe_f2);
243	r = fpu_mul(fe);
244	CPYFPN(&x2, r);
245
246	k = 3;
247	for (;;) {
248		/* s1 := -s0 * x2 */
249		CPYFPN(&fe->fe_f1, &s0);
250		CPYFPN(&fe->fe_f2, &x2);
251		s1 = fpu_mul(fe);
252		s1->fp_sign ^= 1;
253		CPYFPN(&fe->fe_f1, s1);
254
255		/* s0 := s1 for next loop */
256		CPYFPN(&s0, s1);
257
258		/* s1 := s1 / k */
259		fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
260		s1 = fpu_div(fe);
261
262		/* break if s1 is enough small */
263		if (ISZERO(s1))
264			break;
265		if (res.fp_exp - s1->fp_exp >= FP_NMANT)
266			break;
267
268		/* res += s1 */
269		CPYFPN(&fe->fe_f2, s1);
270		CPYFPN(&fe->fe_f1, &res);
271		r = fpu_add(fe);
272		CPYFPN(&res, r);
273
274		k += 2;
275	}
276
277	CPYFPN(&fe->fe_f2, &res);
278	return &fe->fe_f2;
279}
280
281static void
282printf_fpn(const struct fpn *fp)
283{
284	printf("{ %d, %d, %3d, %d, { 0x%08x, 0x%08x, 0x%08x, }, }",
285		fp->fp_class, fp->fp_sign, fp->fp_exp, fp->fp_sticky ? 1 : 0,
286		fp->fp_mant[0], fp->fp_mant[1], fp->fp_mant[2]);
287}
288
289static void
290printf_sfpn(const struct sfpn *sp)
291{
292	printf("{ 0x%08x, 0x%08x, 0x%08x, }",
293		sp->sp_m0, sp->sp_m1, sp->sp_m2);
294}
295
296static void
297fpn_to_sfpn(struct sfpn *sp, const struct fpn *fp)
298{
299	sp->sp_m0 = (fp->fp_exp << 24) | fp->fp_mant[0];
300	sp->sp_m1 = fp->fp_mant[1];
301	sp->sp_m2 = fp->fp_mant[2];
302}
303
304#else /* CORDIC_BOOTSTRAP */
305
306static const struct sfpn atan_table[] = {
307	{ 0xff06487e, 0xd5110b46, 0x11a80000, },
308	{ 0xfe076b19, 0xc1586ed3, 0xda2b7f0d, },
309	{ 0xfd07d6dd, 0x7e4b2037, 0x58ab6e33, },
310	{ 0xfc07f56e, 0xa6ab0bdb, 0x719644b5, },
311	{ 0xfb07fd56, 0xedcb3f7a, 0x71b65937, },
312	{ 0xfa07ff55, 0x6eea5d89, 0x2a13bce7, },
313	{ 0xf907ffd5, 0x56eedca6, 0xaddf3c5f, },
314	{ 0xf807fff5, 0x556eeea5, 0xcb403117, },
315	{ 0xf707fffd, 0x5556eeed, 0xca5d8956, },
316	{ 0xf607ffff, 0x55556eee, 0xea5ca6ab, },
317	{ 0xf507ffff, 0xd55556ee, 0xeedca5c8, },
318	{ 0xf407ffff, 0xf555556e, 0xeeeea5c8, },
319	{ 0xf307ffff, 0xfd555556, 0xeeeeedc8, },
320	{ 0xf207ffff, 0xff555555, 0x6eeeeee8, },
321	{ 0xf107ffff, 0xffd55555, 0x56eeeeed, },
322	{ 0xf007ffff, 0xfff55555, 0x556eeeed, },
323	{ 0xef07ffff, 0xfffd5555, 0x5556eeed, },
324	{ 0xee07ffff, 0xffff5555, 0x55556eed, },
325	{ 0xed07ffff, 0xffffd555, 0x555556ed, },
326	{ 0xec07ffff, 0xfffff555, 0x5555556d, },
327	{ 0xeb07ffff, 0xfffffd55, 0x55555555, },
328	{ 0xea07ffff, 0xffffff55, 0x55555554, },
329	{ 0xe907ffff, 0xffffffd5, 0x55555554, },
330	{ 0xe807ffff, 0xfffffff5, 0x55555554, },
331	{ 0xe707ffff, 0xfffffffd, 0x55555554, },
332	{ 0xe607ffff, 0xffffffff, 0x55555554, },
333	{ 0xe507ffff, 0xffffffff, 0xd5555554, },
334	{ 0xe407ffff, 0xffffffff, 0xf5555554, },
335	{ 0xe307ffff, 0xffffffff, 0xfd555554, },
336	{ 0xe207ffff, 0xffffffff, 0xff555554, },
337	{ 0xe107ffff, 0xffffffff, 0xffd55554, },
338	{ 0xe007ffff, 0xffffffff, 0xfff55554, },
339	{ 0xdf07ffff, 0xffffffff, 0xfffd5554, },
340	{ 0xde07ffff, 0xffffffff, 0xffff5554, },
341	{ 0xdd07ffff, 0xffffffff, 0xffffd554, },
342	{ 0xdc07ffff, 0xffffffff, 0xfffff554, },
343	{ 0xdb07ffff, 0xffffffff, 0xfffffd54, },
344	{ 0xda07ffff, 0xffffffff, 0xffffff54, },
345	{ 0xd907ffff, 0xffffffff, 0xffffffd4, },
346	{ 0xd807ffff, 0xffffffff, 0xfffffff4, },
347	{ 0xd707ffff, 0xffffffff, 0xfffffffc, },
348	{ 0xd7040000, 0x00000000, 0x00000000, },
349	{ 0xd6040000, 0x00000000, 0x00000000, },
350	{ 0xd5040000, 0x00000000, 0x00000000, },
351	{ 0xd4040000, 0x00000000, 0x00000000, },
352	{ 0xd3040000, 0x00000000, 0x00000000, },
353	{ 0xd2040000, 0x00000000, 0x00000000, },
354	{ 0xd1040000, 0x00000000, 0x00000000, },
355	{ 0xd0040000, 0x00000000, 0x00000000, },
356	{ 0xcf040000, 0x00000000, 0x00000000, },
357	{ 0xce040000, 0x00000000, 0x00000000, },
358	{ 0xcd040000, 0x00000000, 0x00000000, },
359	{ 0xcc040000, 0x00000000, 0x00000000, },
360	{ 0xcb040000, 0x00000000, 0x00000000, },
361	{ 0xca040000, 0x00000000, 0x00000000, },
362	{ 0xc9040000, 0x00000000, 0x00000000, },
363	{ 0xc8040000, 0x00000000, 0x00000000, },
364	{ 0xc7040000, 0x00000000, 0x00000000, },
365	{ 0xc6040000, 0x00000000, 0x00000000, },
366	{ 0xc5040000, 0x00000000, 0x00000000, },
367	{ 0xc4040000, 0x00000000, 0x00000000, },
368	{ 0xc3040000, 0x00000000, 0x00000000, },
369	{ 0xc2040000, 0x00000000, 0x00000000, },
370	{ 0xc1040000, 0x00000000, 0x00000000, },
371};
372
373static const struct sfpn atanh_table[] = {
374	{ 0xff0464fa, 0x9eab40c2, 0xa5dc43f6, },
375	{ 0xfe04162b, 0xbea04514, 0x69ca8e4a, },
376	{ 0xfd040562, 0x4727abbd, 0xda654b67, },
377	{ 0xfc040156, 0x22b4dd6b, 0x372a679c, },
378	{ 0xfb040055, 0x62246bb8, 0x92d60b35, },
379	{ 0xfa040015, 0x56222b47, 0x2637d656, },
380	{ 0xf9040005, 0x55622246, 0xb4dcf86e, },
381	{ 0xf8040001, 0x55562222, 0xb46bb307, },
382	{ 0xf7040000, 0x55556222, 0x246b45cd, },
383	{ 0xf6040000, 0x15555622, 0x222b465b, },
384	{ 0xf5040000, 0x05555562, 0x2222467f, },
385	{ 0xf4040000, 0x01555556, 0x22221eaf, },
386	{ 0xf3040000, 0x00555555, 0x62222213, },
387	{ 0xf2040000, 0x00155555, 0x56221221, },
388	{ 0xf1040000, 0x00055555, 0x556221a2, },
389	{ 0xf0040000, 0x00015555, 0x5556221e, },
390	{ 0xef040000, 0x00005555, 0x55552222, },
391	{ 0xee040000, 0x00001555, 0x55555222, },
392	{ 0xed040000, 0x00000555, 0x55555522, },
393	{ 0xec040000, 0x00000155, 0x55555552, },
394	{ 0xeb040000, 0x00000055, 0x554d5555, },
395	{ 0xea040000, 0x00000015, 0x55545555, },
396	{ 0xe9040000, 0x00000005, 0x55553555, },
397	{ 0xe8040000, 0x00000001, 0x55555155, },
398	{ 0xe7040000, 0x00000000, 0x555554d5, },
399	{ 0xe6040000, 0x00000000, 0x15555545, },
400	{ 0xe5040000, 0x00000000, 0x05555553, },
401	{ 0xe307ffff, 0xffffffff, 0xfaaaaaaa, },
402	{ 0xe207ffff, 0xffffffff, 0xfeaaaaaa, },
403	{ 0xe107ffff, 0xffffffff, 0xffaaaaaa, },
404	{ 0xe007ffff, 0xffffffff, 0xffeaaaaa, },
405	{ 0xdf07ffff, 0xffffffff, 0xfffaaaaa, },
406	{ 0xde07ffff, 0xffffffff, 0xfffeaaaa, },
407	{ 0xdd07ffff, 0xffffffff, 0xffffaaaa, },
408	{ 0xdc07ffff, 0xffffffff, 0xffffeaaa, },
409	{ 0xdb07ffff, 0xffffffff, 0xfffffaaa, },
410	{ 0xda07ffff, 0xffffffff, 0xfffffeaa, },
411	{ 0xd907ffff, 0xffffffff, 0xffffffaa, },
412	{ 0xd807ffff, 0xffffffff, 0xffffffea, },
413	{ 0xd707ffff, 0xffffffff, 0xfffffffa, },
414	{ 0xd607ffff, 0xffffffff, 0xfffffffe, },
415	{ 0xd507ffff, 0xfffffe00, 0x00000000, },
416	{ 0xd407ffff, 0xffffff00, 0x00000000, },
417	{ 0xd307ffff, 0xffffff80, 0x00000000, },
418	{ 0xd207ffff, 0xffffffc0, 0x00000000, },
419	{ 0xd107ffff, 0xffffffe0, 0x00000000, },
420	{ 0xd007ffff, 0xfffffff0, 0x00000000, },
421	{ 0xcf07ffff, 0xfffffff8, 0x00000000, },
422	{ 0xce07ffff, 0xfffffffc, 0x00000000, },
423	{ 0xcd07ffff, 0xfffffffe, 0x00000000, },
424	{ 0xcc07ffff, 0xffffffff, 0x00000000, },
425	{ 0xcb07ffff, 0xffffffff, 0x80000000, },
426	{ 0xca07ffff, 0xffffffff, 0xc0000000, },
427	{ 0xc907ffff, 0xffffffff, 0xe0000000, },
428	{ 0xc807ffff, 0xffffffff, 0xf0000000, },
429	{ 0xc707ffff, 0xffffffff, 0xf8000000, },
430	{ 0xc607ffff, 0xffffffff, 0xfc000000, },
431	{ 0xc507ffff, 0xffffffff, 0xfe000000, },
432	{ 0xc407ffff, 0xffffffff, 0xff000000, },
433	{ 0xc307ffff, 0xffffffff, 0xff800000, },
434	{ 0xc207ffff, 0xffffffff, 0xffc00000, },
435	{ 0xc107ffff, 0xffffffff, 0xffe00000, },
436	{ 0xc007ffff, 0xffffffff, 0xfff00000, },
437	{ 0xbf07ffff, 0xffffffff, 0xfff80000, },
438};
439
440const struct fpn fpu_cordic_inv_gain1 =
441	{ 1, 0,  -1, 1, { 0x0004dba7, 0x6d421af2, 0xd33fafd1, }, };
442
443const struct fpn fpu_cordic_inv_gain2 =
444	{ 1, 0,   0, 1, { 0x0004d483, 0xec3803fc, 0xc5ff12f8, }, };
445
446#endif /* CORDIC_BOOTSTRAP */
447
448static inline void
449sfpn_to_fpn(struct fpn *fp, const struct sfpn *s)
450{
451	fp->fp_class = FPC_NUM;
452	fp->fp_sign = 0;
453	fp->fp_sticky = 0;
454	fp->fp_exp = s->sp_m0 >> 24;
455	if (fp->fp_exp & 0x80) {
456		fp->fp_exp |= 0xffffff00;
457	}
458	fp->fp_mant[0] = s->sp_m0 & 0x000fffff;
459	fp->fp_mant[1] = s->sp_m1;
460	fp->fp_mant[2] = s->sp_m2;
461}
462
463void
464fpu_cordit1(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0,
465	const struct fpn *vecmode)
466{
467	struct fpn t;
468	struct fpn x;
469	struct fpn y;
470	struct fpn z;
471	struct fpn *r;
472	int i;
473	int sign;
474
475	fpu_const(&t, FPU_CONST_1);
476	CPYFPN(&x, x0);
477	CPYFPN(&y, y0);
478	CPYFPN(&z, z0);
479
480	for (i = 0; i < EXT_FRACBITS; i++) {
481		struct fpn x1;
482
483		/* y < vecmode */
484		CPYFPN(&fe->fe_f1, &y);
485		CPYFPN(&fe->fe_f2, vecmode);
486		fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
487		r = fpu_add(fe);
488
489		if ((vecmode->fp_sign == 0 && r->fp_sign) ||
490		    (vecmode->fp_sign && z.fp_sign == 0)) {
491			sign = 1;
492		} else {
493			sign = 0;
494		}
495
496		/* y * t */
497		CPYFPN(&fe->fe_f1, &y);
498		CPYFPN(&fe->fe_f2, &t);
499		r = fpu_mul(fe);
500
501		/*
502		 * x1 = x - y*t (if sign)
503		 * x1 = x + y*t
504		 */
505		CPYFPN(&fe->fe_f2, r);
506		if (sign)
507			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
508		CPYFPN(&fe->fe_f1, &x);
509		r = fpu_add(fe);
510		CPYFPN(&x1, r);
511
512		/* x * t */
513		CPYFPN(&fe->fe_f1, &x);
514		CPYFPN(&fe->fe_f2, &t);
515		r = fpu_mul(fe);
516
517		/*
518		 * y = y + x*t (if sign)
519		 * y = y - x*t
520		 */
521		CPYFPN(&fe->fe_f2, r);
522		if (!sign)
523			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
524		CPYFPN(&fe->fe_f1, &y);
525		r = fpu_add(fe);
526		CPYFPN(&y, r);
527
528		/*
529		 * z = z - atan_table[i] (if sign)
530		 * z = z + atan_table[i]
531		 */
532		CPYFPN(&fe->fe_f1, &z);
533		sfpn_to_fpn(&fe->fe_f2, &atan_table[i]);
534		if (sign)
535			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
536		r = fpu_add(fe);
537		CPYFPN(&z, r);
538
539		/* x = x1 */
540		CPYFPN(&x, &x1);
541
542		/* t /= 2 */
543		t.fp_exp--;
544	}
545
546	CPYFPN(x0, &x);
547	CPYFPN(y0, &y);
548	CPYFPN(z0, &z);
549}
550
551void
552fpu_cordit2(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0,
553	const struct fpn *vecmode)
554{
555	struct fpn t;
556	struct fpn x;
557	struct fpn y;
558	struct fpn z;
559	struct fpn *r;
560	int i;
561	int k;
562	int sign;
563
564	/* t = 0.5 */
565	fpu_const(&t, FPU_CONST_1);
566	t.fp_exp--;
567
568	CPYFPN(&x, x0);
569	CPYFPN(&y, y0);
570	CPYFPN(&z, z0);
571
572	k = 3;
573	for (i = 0; i < EXT_FRACBITS; i++) {
574		struct fpn x1;
575		int j;
576
577		for (j = 0; j < 2; j++) {
578			if ((vecmode->fp_sign == 0 && y.fp_sign) ||
579			    (vecmode->fp_sign && z.fp_sign == 0)) {
580				sign = 0;
581			} else {
582				sign = 1;
583			}
584
585			/* y * t */
586			CPYFPN(&fe->fe_f1, &y);
587			CPYFPN(&fe->fe_f2, &t);
588			r = fpu_mul(fe);
589
590			/*
591			 * x1 = x + y*t
592			 * x1 = x - y*t (if sign)
593			 */
594			CPYFPN(&fe->fe_f2, r);
595			if (sign)
596				fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
597			CPYFPN(&fe->fe_f1, &x);
598			r = fpu_add(fe);
599			CPYFPN(&x1, r);
600
601			/* x * t */
602			CPYFPN(&fe->fe_f1, &x);
603			CPYFPN(&fe->fe_f2, &t);
604			r = fpu_mul(fe);
605
606			/*
607			 * y = y + x*t
608			 * y = y - x*t (if sign)
609			 */
610			CPYFPN(&fe->fe_f2, r);
611			if (sign)
612				fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
613			CPYFPN(&fe->fe_f1, &y);
614			r = fpu_add(fe);
615			CPYFPN(&y, r);
616
617			/*
618			 * z = z + atanh_table[i] (if sign)
619			 * z = z - atanh_table[i]
620			 */
621			CPYFPN(&fe->fe_f1, &z);
622			sfpn_to_fpn(&fe->fe_f2, &atanh_table[i]);
623			if (!sign)
624				fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
625			r = fpu_add(fe);
626			CPYFPN(&z, r);
627
628			/* x = x1 */
629			CPYFPN(&x, &x1);
630
631			if (k > 0) {
632				k--;
633				break;
634			} else {
635				k = 3;
636			}
637		}
638
639		/* t /= 2 */
640		t.fp_exp--;
641	}
642
643	CPYFPN(x0, &x);
644	CPYFPN(y0, &y);
645	CPYFPN(z0, &z);
646}
647