1#include <asm/ppc_asm.h>
2#include <asm/reg.h>
3
4/*
5 * The routines below are in assembler so we can closely control the
6 * usage of floating-point registers.  These routines must be called
7 * with preempt disabled.
8 */
9#ifdef CONFIG_PPC32
10	.data
11fpzero:
12	.long	0
13fpone:
14	.long	0x3f800000	/* 1.0 in single-precision FP */
15fphalf:
16	.long	0x3f000000	/* 0.5 in single-precision FP */
17
18#define LDCONST(fr, name)	\
19	lis	r11,name@ha;	\
20	lfs	fr,name@l(r11)
21#else
22
23	.section ".toc","aw"
24fpzero:
25	.tc	FD_0_0[TC],0
26fpone:
27	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
28fphalf:
29	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
30
31#define LDCONST(fr, name)	\
32	lfd	fr,name@toc(r2)
33#endif
34
35	.text
36/*
37 * Internal routine to enable floating point and set FPSCR to 0.
38 * Don't call it from C; it doesn't use the normal calling convention.
39 */
40fpenable:
41#ifdef CONFIG_PPC32
42	stwu	r1,-64(r1)
43#else
44	stdu	r1,-64(r1)
45#endif
46	mfmsr	r10
47	ori	r11,r10,MSR_FP
48	mtmsr	r11
49	isync
50	stfd	fr0,24(r1)
51	stfd	fr1,16(r1)
52	stfd	fr31,8(r1)
53	LDCONST(fr1, fpzero)
54	mffs	fr31
55	MTFSF_L(fr1)
56	blr
57
58fpdisable:
59	mtlr	r12
60	MTFSF_L(fr31)
61	lfd	fr31,8(r1)
62	lfd	fr1,16(r1)
63	lfd	fr0,24(r1)
64	mtmsr	r10
65	isync
66	addi	r1,r1,64
67	blr
68
69/*
70 * Vector add, floating point.
71 */
72_GLOBAL(vaddfp)
73	mflr	r12
74	bl	fpenable
75	li	r0,4
76	mtctr	r0
77	li	r6,0
781:	lfsx	fr0,r4,r6
79	lfsx	fr1,r5,r6
80	fadds	fr0,fr0,fr1
81	stfsx	fr0,r3,r6
82	addi	r6,r6,4
83	bdnz	1b
84	b	fpdisable
85
86/*
87 * Vector subtract, floating point.
88 */
89_GLOBAL(vsubfp)
90	mflr	r12
91	bl	fpenable
92	li	r0,4
93	mtctr	r0
94	li	r6,0
951:	lfsx	fr0,r4,r6
96	lfsx	fr1,r5,r6
97	fsubs	fr0,fr0,fr1
98	stfsx	fr0,r3,r6
99	addi	r6,r6,4
100	bdnz	1b
101	b	fpdisable
102
103/*
104 * Vector multiply and add, floating point.
105 */
106_GLOBAL(vmaddfp)
107	mflr	r12
108	bl	fpenable
109	stfd	fr2,32(r1)
110	li	r0,4
111	mtctr	r0
112	li	r7,0
1131:	lfsx	fr0,r4,r7
114	lfsx	fr1,r5,r7
115	lfsx	fr2,r6,r7
116	fmadds	fr0,fr0,fr2,fr1
117	stfsx	fr0,r3,r7
118	addi	r7,r7,4
119	bdnz	1b
120	lfd	fr2,32(r1)
121	b	fpdisable
122
123/*
124 * Vector negative multiply and subtract, floating point.
125 */
126_GLOBAL(vnmsubfp)
127	mflr	r12
128	bl	fpenable
129	stfd	fr2,32(r1)
130	li	r0,4
131	mtctr	r0
132	li	r7,0
1331:	lfsx	fr0,r4,r7
134	lfsx	fr1,r5,r7
135	lfsx	fr2,r6,r7
136	fnmsubs	fr0,fr0,fr2,fr1
137	stfsx	fr0,r3,r7
138	addi	r7,r7,4
139	bdnz	1b
140	lfd	fr2,32(r1)
141	b	fpdisable
142
143/*
144 * Vector reciprocal estimate.  We just compute 1.0/x.
145 * r3 -> destination, r4 -> source.
146 */
147_GLOBAL(vrefp)
148	mflr	r12
149	bl	fpenable
150	li	r0,4
151	LDCONST(fr1, fpone)
152	mtctr	r0
153	li	r6,0
1541:	lfsx	fr0,r4,r6
155	fdivs	fr0,fr1,fr0
156	stfsx	fr0,r3,r6
157	addi	r6,r6,4
158	bdnz	1b
159	b	fpdisable
160
161/*
162 * Vector reciprocal square-root estimate, floating point.
163 * We use the frsqrte instruction for the initial estimate followed
164 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
165 * r3 -> destination, r4 -> source.
166 */
167_GLOBAL(vrsqrtefp)
168	mflr	r12
169	bl	fpenable
170	stfd	fr2,32(r1)
171	stfd	fr3,40(r1)
172	stfd	fr4,48(r1)
173	stfd	fr5,56(r1)
174	li	r0,4
175	LDCONST(fr4, fpone)
176	LDCONST(fr5, fphalf)
177	mtctr	r0
178	li	r6,0
1791:	lfsx	fr0,r4,r6
180	frsqrte	fr1,fr0		/* r = frsqrte(s) */
181	fmuls	fr3,fr1,fr0	/* r * s */
182	fmuls	fr2,fr1,fr5	/* r * 0.5 */
183	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
184	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
185	fmuls	fr3,fr1,fr0	/* r * s */
186	fmuls	fr2,fr1,fr5	/* r * 0.5 */
187	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
188	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
189	stfsx	fr1,r3,r6
190	addi	r6,r6,4
191	bdnz	1b
192	lfd	fr5,56(r1)
193	lfd	fr4,48(r1)
194	lfd	fr3,40(r1)
195	lfd	fr2,32(r1)
196	b	fpdisable
197