1/* Compute cubic root of double value.
2   Copyright (C) 1997 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4   Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
5   Ulrich Drepper <drepper@cygnus.com>, 1997.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library; if not, write to the Free
19   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20   02111-1307 USA.  */
21
22#include <machine/asm.h>
23
24#ifdef __ELF__
25        .section .rodata
26#else
27        .text
28#endif
29
30        .align ALIGNARG(4)
31        ASM_TYPE_DIRECTIVE(f7,@object)
32f7:	.double -0.145263899385486377
33	ASM_SIZE_DIRECTIVE(f7)
34        ASM_TYPE_DIRECTIVE(f6,@object)
35f6:	.double 0.784932344976639262
36	ASM_SIZE_DIRECTIVE(f6)
37        ASM_TYPE_DIRECTIVE(f5,@object)
38f5:	.double -1.83469277483613086
39	ASM_SIZE_DIRECTIVE(f5)
40        ASM_TYPE_DIRECTIVE(f4,@object)
41f4:	.double 2.44693122563534430
42	ASM_SIZE_DIRECTIVE(f4)
43        ASM_TYPE_DIRECTIVE(f3,@object)
44f3:	.double -2.11499494167371287
45	ASM_SIZE_DIRECTIVE(f3)
46        ASM_TYPE_DIRECTIVE(f2,@object)
47f2:	.double 1.50819193781584896
48	ASM_SIZE_DIRECTIVE(f2)
49        ASM_TYPE_DIRECTIVE(f1,@object)
50f1:	.double 0.354895765043919860
51	ASM_SIZE_DIRECTIVE(f1)
52
53#define CBRT2		1.2599210498948731648
54#define ONE_CBRT2	0.793700525984099737355196796584
55#define SQR_CBRT2	1.5874010519681994748
56#define ONE_SQR_CBRT2	0.629960524947436582364439673883
57
58	ASM_TYPE_DIRECTIVE(factor,@object)
59factor:	.double ONE_SQR_CBRT2
60	.double ONE_CBRT2
61	.double 1.0
62	.double CBRT2
63	.double SQR_CBRT2
64	ASM_SIZE_DIRECTIVE(factor)
65
66        ASM_TYPE_DIRECTIVE(two54,@object)
67two54:  .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
68        ASM_SIZE_DIRECTIVE(two54)
69
70#ifdef PIC
71#define MO(op) op##@GOTOFF(%ebx)
72#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
73#else
74#define MO(op) op
75#define MOX(op,x) op(x)
76#endif
77
78	.text
79ENTRY(__cbrt)
80	movl	4(%esp), %ecx
81	movl	8(%esp), %eax
82	movl	%eax, %edx
83	andl	$0x7fffffff, %eax
84	orl	%eax, %ecx
85	jz	1f
86	xorl	%ecx, %ecx
87	cmpl	$0x7ff00000, %eax
88	jae	1f
89
90#ifdef PIC
91	pushl	%ebx
92	call	3f
933:	popl	%ebx
94	addl    $_GLOBAL_OFFSET_TABLE_+[.-3b], %ebx
95#endif
96
97	cmpl	$0x00100000, %eax
98	jae	2f
99
100#ifdef PIC
101	fldl	8(%esp)
102#else
103	fldl	4(%esp)
104#endif
105	fmull	MO(two54)
106	movl	$-54, %ecx
107#ifdef PIC
108	fstpl	8(%esp)
109	movl	12(%esp), %eax
110#else
111	fstpl	4(%esp)
112	movl	8(%esp), %eax
113#endif
114	movl	%eax, %edx
115	andl	$0x7fffffff, %eax
116
1172:	shrl	$20, %eax
118	andl	$0x800fffff, %edx
119	subl	$1022, %eax
120	orl	$0x3fe00000, %edx
121	addl	%eax, %ecx
122#ifdef PIC
123	movl	%edx, 12(%esp)
124
125	fldl	8(%esp)			/* xm */
126#else
127	movl	%edx, 8(%esp)
128
129	fldl	4(%esp)			/* xm */
130#endif
131	fabs
132
133	/* The following code has two tracks:
134	    a) compute the normalized cbrt value
135	    b) compute xe/3 and xe%3
136	   The right track computes the value for b) and this is done
137	   in an optimized way by avoiding division.
138
139	   But why two tracks at all?  Very easy: efficiency.  Some FP
140	   instruction can overlap with a certain amount of integer (and
141	   FP) instructions.  So we get (except for the imull) all
142	   instructions for free.  */
143
144	fld	%st(0)			/* xm : xm */
145
146	fmull	MO(f7)			/* f7*xm : xm */
147			movl	$1431655766, %eax
148	faddl	MO(f6)			/* f6+f7*xm : xm */
149			imull	%ecx
150	fmul	%st(1)			/* (f6+f7*xm)*xm : xm */
151			movl	%ecx, %eax
152	faddl	MO(f5)			/* f5+(f6+f7*xm)*xm : xm */
153			sarl	$31, %eax
154	fmul	%st(1)			/* (f5+(f6+f7*xm)*xm)*xm : xm */
155			subl	%eax, %edx
156	faddl	MO(f4)			/* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
157	fmul	%st(1)			/* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
158	faddl	MO(f3)			/* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
159	fmul	%st(1)			/* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
160	faddl	MO(f2)			/* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
161	fmul	%st(1)			/* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
162	faddl	MO(f1)			/* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
163
164	fld	%st			/* u : u : xm */
165	fmul	%st(1)			/* u*u : u : xm */
166	fld	%st(2)			/* xm : u*u : u : xm */
167	fadd	%st			/* 2*xm : u*u : u : xm */
168	fxch	%st(1)			/* u*u : 2*xm : u : xm */
169	fmul	%st(2)			/* t2:=u*u*u : 2*xm : u : xm */
170			movl	%edx, %eax
171	fadd	%st, %st(1)		/* t2 : t2+2*xm : u : xm */
172			leal	(%edx,%edx,2),%edx
173	fadd	%st(0)			/* 2*t2 : t2+2*xm : u : xm */
174			subl	%edx, %ecx
175	faddp	%st, %st(3)		/* t2+2*xm : u : 2*t2+xm */
176			shll	$3, %ecx
177	fmulp				/* u*(t2+2*xm) : 2*t2+xm */
178	fdivp	%st, %st(1)		/* u*(t2+2*xm)/(2*t2+xm) */
179	fmull	MOX(16+factor,%ecx)	/* u*(t2+2*xm)/(2*t2+xm)*FACT */
180	pushl	%eax
181	fildl	(%esp)			/* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
182	fxch				/* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
183	fscale				/* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
184	popl	%edx
185#ifdef PIC
186	movl	12(%esp), %eax
187	popl	%ebx
188#else
189	movl	8(%esp), %eax
190#endif
191	testl	%eax, %eax
192	fstp	%st(1)
193	jns	4f
194	fchs
1954:	ret
196
197	/* Return the argument.  */
1981:	fldl	4(%esp)
199	ret
200END(__cbrt)
201weak_alias (__cbrt, cbrt)
202