1/* mpf_set_str (dest, string, base) -- Convert the string STRING
2   in base BASE to a float in dest.  If BASE is zero, the leading characters
3   of STRING is used to figure out the base.
4
5Copyright 1993-1997, 2000-2003, 2005, 2007, 2008, 2011, 2013, 2019 Free
6Software Foundation, Inc.
7
8This file is part of the GNU MP Library.
9
10The GNU MP Library is free software; you can redistribute it and/or modify
11it under the terms of either:
12
13  * the GNU Lesser General Public License as published by the Free
14    Software Foundation; either version 3 of the License, or (at your
15    option) any later version.
16
17or
18
19  * the GNU General Public License as published by the Free Software
20    Foundation; either version 2 of the License, or (at your option) any
21    later version.
22
23or both in parallel, as here.
24
25The GNU MP Library is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28for more details.
29
30You should have received copies of the GNU General Public License and the
31GNU Lesser General Public License along with the GNU MP Library.  If not,
32see https://www.gnu.org/licenses/.  */
33
34/*
35  This still needs work, as suggested by some FIXME comments.
36  1. Don't depend on superfluous mantissa digits.
37  2. Allocate temp space more cleverly.
38  3. Use mpn_div_q instead of mpn_lshift+mpn_divrem.
39*/
40
41#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
42
43#include "config.h"
44
45#include <stdlib.h>
46#include <string.h>
47#include <ctype.h>
48
49#if HAVE_LANGINFO_H
50#include <langinfo.h>  /* for nl_langinfo */
51#endif
52
53#if HAVE_LOCALE_H
54#include <locale.h>    /* for localeconv */
55#endif
56
57#include "gmp-impl.h"
58#include "longlong.h"
59
60
61#define digit_value_tab __gmp_digit_value_tab
62
63/* Compute base^exp and return the most significant prec limbs in rp[].
64   Put the count of omitted low limbs in *ign.
65   Return the actual size (which might be less than prec).  */
66static mp_size_t
67mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,
68		    mp_limb_t base, mp_exp_t exp,
69		    mp_size_t prec, mp_ptr tp)
70{
71  mp_size_t ign;		/* counts number of ignored low limbs in r */
72  mp_size_t off;		/* keeps track of offset where value starts */
73  mp_ptr passed_rp = rp;
74  mp_size_t rn;
75  int cnt;
76  int i;
77
78  rp[0] = base;
79  rn = 1;
80  off = 0;
81  ign = 0;
82  count_leading_zeros (cnt, exp);
83  for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)
84    {
85      mpn_sqr (tp, rp + off, rn);
86      rn = 2 * rn;
87      rn -= tp[rn - 1] == 0;
88      ign <<= 1;
89
90      off = 0;
91      if (rn > prec)
92	{
93	  ign += rn - prec;
94	  off = rn - prec;
95	  rn = prec;
96	}
97      MP_PTR_SWAP (rp, tp);
98
99      if (((exp >> i) & 1) != 0)
100	{
101	  mp_limb_t cy;
102	  cy = mpn_mul_1 (rp, rp + off, rn, base);
103	  rp[rn] = cy;
104	  rn += cy != 0;
105	  off = 0;
106	}
107    }
108
109  if (rn > prec)
110    {
111      ign += rn - prec;
112      rp += rn - prec;
113      rn = prec;
114    }
115
116  MPN_COPY_INCR (passed_rp, rp + off, rn);
117  *ignp = ign;
118  return rn;
119}
120
121int
122mpf_set_str (mpf_ptr x, const char *str, int base)
123{
124  size_t str_size;
125  char *s, *begs;
126  size_t i, j;
127  int c;
128  int negative;
129  char *dotpos;
130  const char *expptr;
131  int exp_base;
132  const char  *point = GMP_DECIMAL_POINT;
133  size_t      pointlen = strlen (point);
134  const unsigned char *digit_value;
135  int incr;
136  size_t n_zeros_skipped;
137
138  TMP_DECL;
139
140  c = (unsigned char) *str;
141
142  /* Skip whitespace.  */
143  while (isspace (c))
144    c = (unsigned char) *++str;
145
146  negative = 0;
147  if (c == '-')
148    {
149      negative = 1;
150      c = (unsigned char) *++str;
151    }
152
153  /* Default base to decimal.  */
154  if (base == 0)
155    base = 10;
156
157  exp_base = base;
158
159  if (base < 0)
160    {
161      exp_base = 10;
162      base = -base;
163    }
164
165  digit_value = digit_value_tab;
166  if (base > 36)
167    {
168      /* For bases > 36, use the collating sequence
169	 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
170      digit_value += 208;
171      if (base > 62)
172	return -1;		/* too large base */
173    }
174
175  /* Require at least one digit, possibly after an initial decimal point.  */
176  if (digit_value[c] >= base)
177    {
178      /* not a digit, must be a decimal point */
179      for (i = 0; i < pointlen; i++)
180	if (str[i] != point[i])
181	  return -1;
182      if (digit_value[(unsigned char) str[pointlen]] >= base)
183	return -1;
184    }
185
186  /* Locate exponent part of the input.  Look from the right of the string,
187     since the exponent is usually a lot shorter than the mantissa.  */
188  expptr = NULL;
189  str_size = strlen (str);
190  for (i = str_size - 1; i > 0; i--)
191    {
192      c = (unsigned char) str[i];
193      if (c == '@' || (base <= 10 && (c == 'e' || c == 'E')))
194	{
195	  expptr = str + i + 1;
196	  str_size = i;
197	  break;
198	}
199    }
200
201  TMP_MARK;
202  s = begs = (char *) TMP_ALLOC (str_size + 1);
203
204  incr = 0;
205  n_zeros_skipped = 0;
206  dotpos = NULL;
207
208  /* Loop through mantissa, converting it from ASCII to raw byte values.  */
209  for (i = 0; i < str_size; i++)
210    {
211      c = (unsigned char) *str;
212      if (!isspace (c))
213	{
214	  int dig;
215
216	  for (j = 0; j < pointlen; j++)
217	    if (str[j] != point[j])
218	      goto not_point;
219	  if (1)
220	    {
221	      if (dotpos != 0)
222		{
223		  /* already saw a decimal point, another is invalid */
224		  TMP_FREE;
225		  return -1;
226		}
227	      dotpos = s;
228	      str += pointlen - 1;
229	      i += pointlen - 1;
230	    }
231	  else
232	    {
233	    not_point:
234	      dig = digit_value[c];
235	      if (dig >= base)
236		{
237		  TMP_FREE;
238		  return -1;
239		}
240	      *s = dig;
241	      incr |= dig != 0;
242	      s += incr;	/* Increment after first non-0 digit seen. */
243	      if (dotpos != NULL)
244		/* Count skipped zeros between radix point and first non-0
245		   digit. */
246		n_zeros_skipped += 1 - incr;
247	    }
248	}
249      c = (unsigned char) *++str;
250    }
251
252  str_size = s - begs;
253
254  {
255    long exp_in_base;
256    mp_size_t ra, ma, rn, mn;
257    int cnt;
258    mp_ptr mp, tp, rp;
259    mp_exp_t exp_in_limbs;
260    mp_size_t prec = PREC(x) + 1;
261    int divflag;
262    mp_size_t madj, radj;
263
264#if 0
265    size_t n_chars_needed;
266
267    /* This needs careful testing.  Leave disabled for now.  */
268    /* Just consider the relevant leading digits of the mantissa.  */
269    LIMBS_PER_DIGIT_IN_BASE (n_chars_needed, prec, base);
270    if (str_size > n_chars_needed)
271      str_size = n_chars_needed;
272#endif
273
274    if (str_size == 0)
275      {
276	SIZ(x) = 0;
277	EXP(x) = 0;
278	TMP_FREE;
279	return 0;
280      }
281
282    LIMBS_PER_DIGIT_IN_BASE (ma, str_size, base);
283    mp = TMP_ALLOC_LIMBS (ma);
284    mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);
285
286    madj = 0;
287    /* Ignore excess limbs in MP,MSIZE.  */
288    if (mn > prec)
289      {
290	madj = mn - prec;
291	mp += mn - prec;
292	mn = prec;
293      }
294
295    if (expptr != 0)
296      {
297	/* Scan and convert the exponent, in base exp_base.  */
298	long dig, minus, plusminus;
299	c = (unsigned char) *expptr;
300	minus = -(long) (c == '-');
301	plusminus = minus | -(long) (c == '+');
302	expptr -= plusminus;			/* conditional increment */
303	c = (unsigned char) *expptr++;
304	dig = digit_value[c];
305	if (dig >= exp_base)
306	  {
307	    TMP_FREE;
308	    return -1;
309	  }
310	exp_in_base = dig;
311	c = (unsigned char) *expptr++;
312	dig = digit_value[c];
313	while (dig < exp_base)
314	  {
315	    exp_in_base = exp_in_base * exp_base;
316	    exp_in_base += dig;
317	    c = (unsigned char) *expptr++;
318	    dig = digit_value[c];
319	  }
320	exp_in_base = (exp_in_base ^ minus) - minus; /* conditional negation */
321      }
322    else
323      exp_in_base = 0;
324    if (dotpos != 0)
325      exp_in_base -= s - dotpos + n_zeros_skipped;
326    divflag = exp_in_base < 0;
327    exp_in_base = ABS (exp_in_base);
328
329    if (exp_in_base == 0)
330      {
331	MPN_COPY (PTR(x), mp, mn);
332	SIZ(x) = negative ? -mn : mn;
333	EXP(x) = mn + madj;
334	TMP_FREE;
335	return 0;
336      }
337
338    ra = 2 * (prec + 1);
339    TMP_ALLOC_LIMBS_2 (rp, ra, tp, ra);
340    rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp);
341
342    if (divflag)
343      {
344#if 0
345	/* FIXME: Should use mpn_div_q here.  */
346	...
347	mpn_div_q (tp, mp, mn, rp, rn, scratch);
348	...
349#else
350	mp_ptr qp;
351	mp_limb_t qlimb;
352	if (mn < rn)
353	  {
354	    /* Pad out MP,MSIZE for current divrem semantics.  */
355	    mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1);
356	    MPN_ZERO (tmp, rn - mn);
357	    MPN_COPY (tmp + rn - mn, mp, mn);
358	    mp = tmp;
359	    madj -= rn - mn;
360	    mn = rn;
361	  }
362	if ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0)
363	  {
364	    mp_limb_t cy;
365	    count_leading_zeros (cnt, rp[rn - 1]);
366	    cnt -= GMP_NAIL_BITS;
367	    mpn_lshift (rp, rp, rn, cnt);
368	    cy = mpn_lshift (mp, mp, mn, cnt);
369	    if (cy)
370	      mp[mn++] = cy;
371	  }
372
373	qp = TMP_ALLOC_LIMBS (prec + 1);
374	qlimb = mpn_divrem (qp, prec - (mn - rn), mp, mn, rp, rn);
375	tp = qp;
376	exp_in_limbs = qlimb + (mn - rn) + (madj - radj);
377	rn = prec;
378	if (qlimb != 0)
379	  {
380	    tp[prec] = qlimb;
381	    /* Skip the least significant limb not to overrun the destination
382	       variable.  */
383	    tp++;
384	  }
385#endif
386      }
387    else
388      {
389	tp = TMP_ALLOC_LIMBS (rn + mn);
390	if (rn > mn)
391	  mpn_mul (tp, rp, rn, mp, mn);
392	else
393	  mpn_mul (tp, mp, mn, rp, rn);
394	rn += mn;
395	rn -= tp[rn - 1] == 0;
396	exp_in_limbs = rn + madj + radj;
397
398	if (rn > prec)
399	  {
400	    tp += rn - prec;
401	    rn = prec;
402	    exp_in_limbs += 0;
403	  }
404      }
405
406    MPN_COPY (PTR(x), tp, rn);
407    SIZ(x) = negative ? -rn : rn;
408    EXP(x) = exp_in_limbs;
409    TMP_FREE;
410    return 0;
411  }
412}
413