1/* Compute cubic root of double value. 2 Copyright (C) 1997 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 Contributed by Dirk Alboth <dirka@uni-paderborn.de> and 5 Ulrich Drepper <drepper@cygnus.com>, 1997. 6 7 The GNU C Library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Lesser General Public 9 License as published by the Free Software Foundation; either 10 version 2.1 of the License, or (at your option) any later version. 11 12 The GNU C Library is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Lesser General Public License for more details. 16 17 You should have received a copy of the GNU Lesser General Public 18 License along with the GNU C Library; if not, write to the Free 19 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 20 02111-1307 USA. */ 21 22#include <machine/asm.h> 23 24#ifdef __ELF__ 25 .section .rodata 26#else 27 .text 28#endif 29 30 .align ALIGNARG(4) 31 ASM_TYPE_DIRECTIVE(f7,@object) 32f7: .double -0.145263899385486377 33 ASM_SIZE_DIRECTIVE(f7) 34 ASM_TYPE_DIRECTIVE(f6,@object) 35f6: .double 0.784932344976639262 36 ASM_SIZE_DIRECTIVE(f6) 37 ASM_TYPE_DIRECTIVE(f5,@object) 38f5: .double -1.83469277483613086 39 ASM_SIZE_DIRECTIVE(f5) 40 ASM_TYPE_DIRECTIVE(f4,@object) 41f4: .double 2.44693122563534430 42 ASM_SIZE_DIRECTIVE(f4) 43 ASM_TYPE_DIRECTIVE(f3,@object) 44f3: .double -2.11499494167371287 45 ASM_SIZE_DIRECTIVE(f3) 46 ASM_TYPE_DIRECTIVE(f2,@object) 47f2: .double 1.50819193781584896 48 ASM_SIZE_DIRECTIVE(f2) 49 ASM_TYPE_DIRECTIVE(f1,@object) 50f1: .double 0.354895765043919860 51 ASM_SIZE_DIRECTIVE(f1) 52 53#define CBRT2 1.2599210498948731648 54#define ONE_CBRT2 0.793700525984099737355196796584 55#define SQR_CBRT2 1.5874010519681994748 56#define ONE_SQR_CBRT2 0.629960524947436582364439673883 57 58 ASM_TYPE_DIRECTIVE(factor,@object) 59factor: .double ONE_SQR_CBRT2 60 .double ONE_CBRT2 61 .double 1.0 62 .double CBRT2 63 .double SQR_CBRT2 64 ASM_SIZE_DIRECTIVE(factor) 65 66 ASM_TYPE_DIRECTIVE(two54,@object) 67two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43 68 ASM_SIZE_DIRECTIVE(two54) 69 70#ifdef PIC 71#define MO(op) op##@GOTOFF(%ebx) 72#define MOX(op,x) op##@GOTOFF(%ebx,x,1) 73#else 74#define MO(op) op 75#define MOX(op,x) op(x) 76#endif 77 78 .text 79ENTRY(__cbrt) 80 movl 4(%esp), %ecx 81 movl 8(%esp), %eax 82 movl %eax, %edx 83 andl $0x7fffffff, %eax 84 orl %eax, %ecx 85 jz 1f 86 xorl %ecx, %ecx 87 cmpl $0x7ff00000, %eax 88 jae 1f 89 90#ifdef PIC 91 pushl %ebx 92 call 3f 933: popl %ebx 94 addl $_GLOBAL_OFFSET_TABLE_+[.-3b], %ebx 95#endif 96 97 cmpl $0x00100000, %eax 98 jae 2f 99 100#ifdef PIC 101 fldl 8(%esp) 102#else 103 fldl 4(%esp) 104#endif 105 fmull MO(two54) 106 movl $-54, %ecx 107#ifdef PIC 108 fstpl 8(%esp) 109 movl 12(%esp), %eax 110#else 111 fstpl 4(%esp) 112 movl 8(%esp), %eax 113#endif 114 movl %eax, %edx 115 andl $0x7fffffff, %eax 116 1172: shrl $20, %eax 118 andl $0x800fffff, %edx 119 subl $1022, %eax 120 orl $0x3fe00000, %edx 121 addl %eax, %ecx 122#ifdef PIC 123 movl %edx, 12(%esp) 124 125 fldl 8(%esp) /* xm */ 126#else 127 movl %edx, 8(%esp) 128 129 fldl 4(%esp) /* xm */ 130#endif 131 fabs 132 133 /* The following code has two tracks: 134 a) compute the normalized cbrt value 135 b) compute xe/3 and xe%3 136 The right track computes the value for b) and this is done 137 in an optimized way by avoiding division. 138 139 But why two tracks at all? Very easy: efficiency. Some FP 140 instruction can overlap with a certain amount of integer (and 141 FP) instructions. So we get (except for the imull) all 142 instructions for free. */ 143 144 fld %st(0) /* xm : xm */ 145 146 fmull MO(f7) /* f7*xm : xm */ 147 movl $1431655766, %eax 148 faddl MO(f6) /* f6+f7*xm : xm */ 149 imull %ecx 150 fmul %st(1) /* (f6+f7*xm)*xm : xm */ 151 movl %ecx, %eax 152 faddl MO(f5) /* f5+(f6+f7*xm)*xm : xm */ 153 sarl $31, %eax 154 fmul %st(1) /* (f5+(f6+f7*xm)*xm)*xm : xm */ 155 subl %eax, %edx 156 faddl MO(f4) /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */ 157 fmul %st(1) /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */ 158 faddl MO(f3) /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */ 159 fmul %st(1) /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */ 160 faddl MO(f2) /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */ 161 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */ 162 faddl MO(f1) /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */ 163 164 fld %st /* u : u : xm */ 165 fmul %st(1) /* u*u : u : xm */ 166 fld %st(2) /* xm : u*u : u : xm */ 167 fadd %st /* 2*xm : u*u : u : xm */ 168 fxch %st(1) /* u*u : 2*xm : u : xm */ 169 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */ 170 movl %edx, %eax 171 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */ 172 leal (%edx,%edx,2),%edx 173 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */ 174 subl %edx, %ecx 175 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */ 176 shll $3, %ecx 177 fmulp /* u*(t2+2*xm) : 2*t2+xm */ 178 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */ 179 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */ 180 pushl %eax 181 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */ 182 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */ 183 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */ 184 popl %edx 185#ifdef PIC 186 movl 12(%esp), %eax 187 popl %ebx 188#else 189 movl 8(%esp), %eax 190#endif 191 testl %eax, %eax 192 fstp %st(1) 193 jns 4f 194 fchs 1954: ret 196 197 /* Return the argument. */ 1981: fldl 4(%esp) 199 ret 200END(__cbrt) 201weak_alias (__cbrt, cbrt) 202