fpu_cordic.c revision 1.4
1/*	$NetBSD: fpu_cordic.c,v 1.4 2016/12/06 05:58:19 isaki Exp $	*/
2
3/*
4 * Copyright (c) 2013 Tetsuya Isaki. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
22 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: fpu_cordic.c,v 1.4 2016/12/06 05:58:19 isaki Exp $");
30
31#include <machine/ieee.h>
32
33#include "fpu_emulate.h"
34
35/*
36 * sfpn = shoftened fp number; the idea is from fpu_log.c but not the same.
37 * The most significant byte of sp_m0 is EXP (signed byte) and the rest
38 * of sp_m0 is fp_mant[0].
39 */
40struct sfpn {
41	uint32_t sp_m0;
42	uint32_t sp_m1;
43	uint32_t sp_m2;
44};
45
46#if defined(CORDIC_BOOTSTRAP)
47/*
48 * This is a bootstrap code to generate a pre-calculated tables such as
49 * atan_table[].  However, it's just for reference.
50 * If you want to run the bootstrap, you will define CORDIC_BOOTSTRAP
51 * and modify these files as a userland application.
52 */
53
54#include <stdio.h>
55#include <stdlib.h>
56#include <string.h>
57#include <float.h>
58
59static void prepare_cordic_const(struct fpemu *);
60static struct fpn *fpu_gain1_cordic(struct fpemu *);
61static struct fpn *fpu_atan_taylor(struct fpemu *);
62static void printf_fpn(const struct fpn *);
63static void printf_sfpn(const struct sfpn *);
64static void fpn_to_sfpn(struct sfpn *, const struct fpn *);
65
66static struct sfpn atan_table[EXT_FRACBITS];
67static struct fpn inv_gain1;
68
69int
70main(int argc, char *argv[])
71{
72	struct fpemu dummyfe;
73	int i;
74	struct fpn fp;
75
76	memset(&dummyfe, 0, sizeof(dummyfe));
77	prepare_cordic_const(&dummyfe);
78
79	/* output as source code */
80	printf("static const struct sfpn atan_table[] = {\n");
81	for (i = 0; i < EXT_FRACBITS; i++) {
82		printf("\t");
83		printf_sfpn(&atan_table[i]);
84		printf(",\n");
85	}
86	printf("};\n\n");
87
88	printf("const struct fpn fpu_cordic_inv_gain1 =\n\t");
89	printf_fpn(&inv_gain1);
90	printf(";\n\n");
91}
92
93/*
94 * This routine uses fpu_const(), fpu_add(), fpu_div(), fpu_logn()
95 * and fpu_atan_taylor() as bootstrap.
96 */
97static void
98prepare_cordic_const(struct fpemu *fe)
99{
100	struct fpn t;
101	struct fpn x;
102	struct fpn *r;
103	int i;
104
105	/* atan_table */
106	fpu_const(&t, FPU_CONST_1);
107	for (i = 0; i < EXT_FRACBITS; i++) {
108		/* atan(t) */
109		CPYFPN(&fe->fe_f2, &t);
110		r = fpu_atan_taylor(fe);
111		fpn_to_sfpn(&atan_table[i], r);
112
113		/* t /= 2 */
114		t.fp_exp--;
115	}
116
117	/* inv_gain1 = 1 / gain1cordic() */
118	r = fpu_gain1_cordic(fe);
119	CPYFPN(&fe->fe_f2, r);
120	fpu_const(&fe->fe_f1, FPU_CONST_1);
121	r = fpu_div(fe);
122	CPYFPN(&inv_gain1, r);
123}
124
125static struct fpn *
126fpu_gain1_cordic(struct fpemu *fe)
127{
128	struct fpn x;
129	struct fpn y;
130	struct fpn z;
131	struct fpn v;
132
133	fpu_const(&x, FPU_CONST_1);
134	fpu_const(&y, FPU_CONST_0);
135	fpu_const(&z, FPU_CONST_0);
136	CPYFPN(&v, &x);
137	v.fp_sign = !v.fp_sign;
138
139	fpu_cordit1(fe, &x, &y, &z, &v);
140	CPYFPN(&fe->fe_f2, &x);
141	return &fe->fe_f2;
142}
143
144/*
145 * arctan(x) = pi/4 (for |x| = 1)
146 *
147 *                 x^3   x^5   x^7
148 * arctan(x) = x - --- + --- - --- + ...   (for |x| < 1)
149 *                  3     5     7
150 */
151static struct fpn *
152fpu_atan_taylor(struct fpemu *fe)
153{
154	struct fpn res;
155	struct fpn x2;
156	struct fpn s0;
157	struct fpn *s1;
158	struct fpn *r;
159	uint32_t k;
160
161	/* arctan(1) is pi/4 */
162	if (fe->fe_f2.fp_exp == 0) {
163		fpu_const(&fe->fe_f2, FPU_CONST_PI);
164		fe->fe_f2.fp_exp -= 2;
165		return &fe->fe_f2;
166	}
167
168	/* s0 := x */
169	CPYFPN(&s0, &fe->fe_f2);
170
171	/* res := x */
172	CPYFPN(&res, &fe->fe_f2);
173
174	/* x2 := x * x */
175	CPYFPN(&fe->fe_f1, &fe->fe_f2);
176	r = fpu_mul(fe);
177	CPYFPN(&x2, r);
178
179	k = 3;
180	for (;;) {
181		/* s1 := -s0 * x2 */
182		CPYFPN(&fe->fe_f1, &s0);
183		CPYFPN(&fe->fe_f2, &x2);
184		s1 = fpu_mul(fe);
185		s1->fp_sign ^= 1;
186		CPYFPN(&fe->fe_f1, s1);
187
188		/* s0 := s1 for next loop */
189		CPYFPN(&s0, s1);
190
191		/* s1 := s1 / k */
192		fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
193		s1 = fpu_div(fe);
194
195		/* break if s1 is enough small */
196		if (ISZERO(s1))
197			break;
198		if (res.fp_exp - s1->fp_exp >= FP_NMANT)
199			break;
200
201		/* res += s1 */
202		CPYFPN(&fe->fe_f2, s1);
203		CPYFPN(&fe->fe_f1, &res);
204		r = fpu_add(fe);
205		CPYFPN(&res, r);
206
207		k += 2;
208	}
209
210	CPYFPN(&fe->fe_f2, &res);
211	return &fe->fe_f2;
212}
213
214static void
215printf_fpn(const struct fpn *fp)
216{
217	printf("{ %d, %d, %3d, %d, { 0x%08x, 0x%08x, 0x%08x, }, }",
218		fp->fp_class, fp->fp_sign, fp->fp_exp, fp->fp_sticky ? 1 : 0,
219		fp->fp_mant[0], fp->fp_mant[1], fp->fp_mant[2]);
220}
221
222static void
223printf_sfpn(const struct sfpn *sp)
224{
225	printf("{ 0x%08x, 0x%08x, 0x%08x, }",
226		sp->sp_m0, sp->sp_m1, sp->sp_m2);
227}
228
229static void
230fpn_to_sfpn(struct sfpn *sp, const struct fpn *fp)
231{
232	sp->sp_m0 = (fp->fp_exp << 24) | fp->fp_mant[0];
233	sp->sp_m1 = fp->fp_mant[1];
234	sp->sp_m2 = fp->fp_mant[2];
235}
236
237#else /* CORDIC_BOOTSTRAP */
238
239static const struct sfpn atan_table[] = {
240	{ 0xff06487e, 0xd5110b46, 0x11a80000, },
241	{ 0xfe076b19, 0xc1586ed3, 0xda2b7f0d, },
242	{ 0xfd07d6dd, 0x7e4b2037, 0x58ab6e33, },
243	{ 0xfc07f56e, 0xa6ab0bdb, 0x719644b5, },
244	{ 0xfb07fd56, 0xedcb3f7a, 0x71b65937, },
245	{ 0xfa07ff55, 0x6eea5d89, 0x2a13bce7, },
246	{ 0xf907ffd5, 0x56eedca6, 0xaddf3c5f, },
247	{ 0xf807fff5, 0x556eeea5, 0xcb403117, },
248	{ 0xf707fffd, 0x5556eeed, 0xca5d8956, },
249	{ 0xf607ffff, 0x55556eee, 0xea5ca6ab, },
250	{ 0xf507ffff, 0xd55556ee, 0xeedca5c8, },
251	{ 0xf407ffff, 0xf555556e, 0xeeeea5c8, },
252	{ 0xf307ffff, 0xfd555556, 0xeeeeedc8, },
253	{ 0xf207ffff, 0xff555555, 0x6eeeeee8, },
254	{ 0xf107ffff, 0xffd55555, 0x56eeeeed, },
255	{ 0xf007ffff, 0xfff55555, 0x556eeeed, },
256	{ 0xef07ffff, 0xfffd5555, 0x5556eeed, },
257	{ 0xee07ffff, 0xffff5555, 0x55556eed, },
258	{ 0xed07ffff, 0xffffd555, 0x555556ed, },
259	{ 0xec07ffff, 0xfffff555, 0x5555556d, },
260	{ 0xeb07ffff, 0xfffffd55, 0x55555555, },
261	{ 0xea07ffff, 0xffffff55, 0x55555554, },
262	{ 0xe907ffff, 0xffffffd5, 0x55555554, },
263	{ 0xe807ffff, 0xfffffff5, 0x55555554, },
264	{ 0xe707ffff, 0xfffffffd, 0x55555554, },
265	{ 0xe607ffff, 0xffffffff, 0x55555554, },
266	{ 0xe507ffff, 0xffffffff, 0xd5555554, },
267	{ 0xe407ffff, 0xffffffff, 0xf5555554, },
268	{ 0xe307ffff, 0xffffffff, 0xfd555554, },
269	{ 0xe207ffff, 0xffffffff, 0xff555554, },
270	{ 0xe107ffff, 0xffffffff, 0xffd55554, },
271	{ 0xe007ffff, 0xffffffff, 0xfff55554, },
272	{ 0xdf07ffff, 0xffffffff, 0xfffd5554, },
273	{ 0xde07ffff, 0xffffffff, 0xffff5554, },
274	{ 0xdd07ffff, 0xffffffff, 0xffffd554, },
275	{ 0xdc07ffff, 0xffffffff, 0xfffff554, },
276	{ 0xdb07ffff, 0xffffffff, 0xfffffd54, },
277	{ 0xda07ffff, 0xffffffff, 0xffffff54, },
278	{ 0xd907ffff, 0xffffffff, 0xffffffd4, },
279	{ 0xd807ffff, 0xffffffff, 0xfffffff4, },
280	{ 0xd707ffff, 0xffffffff, 0xfffffffc, },
281	{ 0xd7040000, 0x00000000, 0x00000000, },
282	{ 0xd6040000, 0x00000000, 0x00000000, },
283	{ 0xd5040000, 0x00000000, 0x00000000, },
284	{ 0xd4040000, 0x00000000, 0x00000000, },
285	{ 0xd3040000, 0x00000000, 0x00000000, },
286	{ 0xd2040000, 0x00000000, 0x00000000, },
287	{ 0xd1040000, 0x00000000, 0x00000000, },
288	{ 0xd0040000, 0x00000000, 0x00000000, },
289	{ 0xcf040000, 0x00000000, 0x00000000, },
290	{ 0xce040000, 0x00000000, 0x00000000, },
291	{ 0xcd040000, 0x00000000, 0x00000000, },
292	{ 0xcc040000, 0x00000000, 0x00000000, },
293	{ 0xcb040000, 0x00000000, 0x00000000, },
294	{ 0xca040000, 0x00000000, 0x00000000, },
295	{ 0xc9040000, 0x00000000, 0x00000000, },
296	{ 0xc8040000, 0x00000000, 0x00000000, },
297	{ 0xc7040000, 0x00000000, 0x00000000, },
298	{ 0xc6040000, 0x00000000, 0x00000000, },
299	{ 0xc5040000, 0x00000000, 0x00000000, },
300	{ 0xc4040000, 0x00000000, 0x00000000, },
301	{ 0xc3040000, 0x00000000, 0x00000000, },
302	{ 0xc2040000, 0x00000000, 0x00000000, },
303	{ 0xc1040000, 0x00000000, 0x00000000, },
304};
305
306const struct fpn fpu_cordic_inv_gain1 =
307	{ 1, 0,  -1, 1, { 0x0004dba7, 0x6d421af2, 0xd33fafd1, }, };
308
309#endif /* CORDIC_BOOTSTRAP */
310
311static inline void
312sfpn_to_fpn(struct fpn *fp, const struct sfpn *s)
313{
314	fp->fp_class = FPC_NUM;
315	fp->fp_sign = 0;
316	fp->fp_sticky = 0;
317	fp->fp_exp = s->sp_m0 >> 24;
318	if (fp->fp_exp & 0x80) {
319		fp->fp_exp |= 0xffffff00;
320	}
321	fp->fp_mant[0] = s->sp_m0 & 0x000fffff;
322	fp->fp_mant[1] = s->sp_m1;
323	fp->fp_mant[2] = s->sp_m2;
324}
325
326void
327fpu_cordit1(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0,
328	const struct fpn *vecmode)
329{
330	struct fpn t;
331	struct fpn x;
332	struct fpn y;
333	struct fpn z;
334	struct fpn *r;
335	int i;
336	int sign;
337
338	fpu_const(&t, FPU_CONST_1);
339	CPYFPN(&x, x0);
340	CPYFPN(&y, y0);
341	CPYFPN(&z, z0);
342
343	for (i = 0; i < EXT_FRACBITS; i++) {
344		struct fpn x1;
345
346		/* y < vecmode */
347		CPYFPN(&fe->fe_f1, &y);
348		CPYFPN(&fe->fe_f2, vecmode);
349		fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
350		r = fpu_add(fe);
351
352		if ((vecmode->fp_sign == 0 && r->fp_sign) ||
353		    (vecmode->fp_sign && z.fp_sign == 0)) {
354			sign = 1;
355		} else {
356			sign = 0;
357		}
358
359		/* y * t */
360		CPYFPN(&fe->fe_f1, &y);
361		CPYFPN(&fe->fe_f2, &t);
362		r = fpu_mul(fe);
363
364		/*
365		 * x1 = x - y*t (if sign)
366		 * x1 = x + y*t
367		 */
368		CPYFPN(&fe->fe_f2, r);
369		if (sign)
370			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
371		CPYFPN(&fe->fe_f1, &x);
372		r = fpu_add(fe);
373		CPYFPN(&x1, r);
374
375		/* x * t */
376		CPYFPN(&fe->fe_f1, &x);
377		CPYFPN(&fe->fe_f2, &t);
378		r = fpu_mul(fe);
379
380		/*
381		 * y = y + x*t (if sign)
382		 * y = y - x*t
383		 */
384		CPYFPN(&fe->fe_f2, r);
385		if (!sign)
386			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
387		CPYFPN(&fe->fe_f1, &y);
388		r = fpu_add(fe);
389		CPYFPN(&y, r);
390
391		/*
392		 * z = z - atan_table[i] (if sign)
393		 * z = z + atan_table[i]
394		 */
395		CPYFPN(&fe->fe_f1, &z);
396		sfpn_to_fpn(&fe->fe_f2, &atan_table[i]);
397		if (sign)
398			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
399		r = fpu_add(fe);
400		CPYFPN(&z, r);
401
402		/* x = x1 */
403		CPYFPN(&x, &x1);
404
405		/* t /= 2 */
406		t.fp_exp--;
407	}
408
409	CPYFPN(x0, &x);
410	CPYFPN(y0, &y);
411	CPYFPN(z0, &z);
412}
413