1
2/*
3 * IBM Accurate Mathematical Library
4 * written by International Business Machines Corp.
5 * Copyright (C) 2001 Free Software Foundation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 */
21/****************************************************************************/
22/*  MODULE_NAME:mpsqrt.c                                                    */
23/*                                                                          */
24/*  FUNCTION:mpsqrt                                                         */
25/*           fastiroot                                                      */
26/*                                                                          */
27/* FILES NEEDED:endian.h mpa.h mpsqrt.h                                     */
28/*              mpa.c                                                       */
29/* Multi-Precision square root function subroutine for precision p >= 4.    */
30/* The relative error is bounded by 3.501*r**(1-p), where r=2**24.          */
31/*                                                                          */
32/****************************************************************************/
33#include "endian.h"
34#include "mpa.h"
35
36/****************************************************************************/
37/* Multi-Precision square root function subroutine for precision p >= 4.    */
38/* The relative error is bounded by 3.501*r**(1-p), where r=2**24.          */
39/* Routine receives two pointers to  Multi Precision numbers:               */
40/* x (left argument) and y (next argument). Routine also receives precision */
41/* p as integer. Routine computes sqrt(*x) and stores result in *y          */
42/****************************************************************************/
43
44double fastiroot(double);
45
46void __mpsqrt(mp_no *x, mp_no *y, int p) {
47#include "mpsqrt.h"
48
49  int i,m,ex,ey;
50  double dx,dy;
51  mp_no
52    mphalf   = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
53                   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
54                   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}},
55    mp3halfs = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
56                   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
57                   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
58  mp_no mpxn,mpz,mpu,mpt1,mpt2;
59
60  /* Prepare multi-precision 1/2 and 3/2 */
61  mphalf.e  =0;  mphalf.d[0]  =ONE;  mphalf.d[1]  =HALFRAD;
62  mp3halfs.e=1;  mp3halfs.d[0]=ONE;  mp3halfs.d[1]=ONE;  mp3halfs.d[2]=HALFRAD;
63
64  ex=EX;      ey=EX/2;     __cpy(x,&mpxn,p);    mpxn.e -= (ey+ey);
65  __mp_dbl(&mpxn,&dx,p);   dy=fastiroot(dx);    __dbl_mp(dy,&mpu,p);
66  __mul(&mpxn,&mphalf,&mpz,p);
67
68  m=mp[p];
69  for (i=0; i<m; i++) {
70    __mul(&mpu,&mpu,&mpt1,p);
71    __mul(&mpt1,&mpz,&mpt2,p);
72    __sub(&mp3halfs,&mpt2,&mpt1,p);
73    __mul(&mpu,&mpt1,&mpt2,p);
74    __cpy(&mpt2,&mpu,p);
75  }
76  __mul(&mpxn,&mpu,y,p);  EY += ey;
77
78  return;
79}
80
81/***********************************************************/
82/* Compute a double precision approximation for 1/sqrt(x)  */
83/* with the relative error bounded by 2**-51.              */
84/***********************************************************/
85double fastiroot(double x) {
86  union {long i[2]; double d;} p,q;
87  double y,z, t;
88  long n;
89  static const double c0 = 0.99674, c1 = -0.53380, c2 = 0.45472, c3 = -0.21553;
90
91  p.d = x;
92  p.i[HIGH_HALF] = (p.i[HIGH_HALF] & 0x3FFFFFFF ) | 0x3FE00000 ;
93  q.d = x;
94  y = p.d;
95  z = y -1.0;
96  n = (q.i[HIGH_HALF] - p.i[HIGH_HALF])>>1;
97  z = ((c3*z + c2)*z + c1)*z + c0;            /* 2**-7         */
98  z = z*(1.5 - 0.5*y*z*z);                    /* 2**-14        */
99  p.d = z*(1.5 - 0.5*y*z*z);                  /* 2**-28        */
100  p.i[HIGH_HALF] -= n;
101  t = x*p.d;
102  return p.d*(1.5 - 0.5*p.d*t);
103}
104