1/* mpf_set_str (dest, string, base) -- Convert the string STRING 2 in base BASE to a float in dest. If BASE is zero, the leading characters 3 of STRING is used to figure out the base. 4 5Copyright 1993-1997, 2000-2003, 2005, 2007, 2008, 2011, 2013, 2019 Free 6Software Foundation, Inc. 7 8This file is part of the GNU MP Library. 9 10The GNU MP Library is free software; you can redistribute it and/or modify 11it under the terms of either: 12 13 * the GNU Lesser General Public License as published by the Free 14 Software Foundation; either version 3 of the License, or (at your 15 option) any later version. 16 17or 18 19 * the GNU General Public License as published by the Free Software 20 Foundation; either version 2 of the License, or (at your option) any 21 later version. 22 23or both in parallel, as here. 24 25The GNU MP Library is distributed in the hope that it will be useful, but 26WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28for more details. 29 30You should have received copies of the GNU General Public License and the 31GNU Lesser General Public License along with the GNU MP Library. If not, 32see https://www.gnu.org/licenses/. */ 33 34/* 35 This still needs work, as suggested by some FIXME comments. 36 1. Don't depend on superfluous mantissa digits. 37 2. Allocate temp space more cleverly. 38 3. Use mpn_div_q instead of mpn_lshift+mpn_divrem. 39*/ 40 41#define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */ 42 43#include "config.h" 44 45#include <stdlib.h> 46#include <string.h> 47#include <ctype.h> 48 49#if HAVE_LANGINFO_H 50#include <langinfo.h> /* for nl_langinfo */ 51#endif 52 53#if HAVE_LOCALE_H 54#include <locale.h> /* for localeconv */ 55#endif 56 57#include "gmp-impl.h" 58#include "longlong.h" 59 60 61#define digit_value_tab __gmp_digit_value_tab 62 63/* Compute base^exp and return the most significant prec limbs in rp[]. 64 Put the count of omitted low limbs in *ign. 65 Return the actual size (which might be less than prec). */ 66static mp_size_t 67mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp, 68 mp_limb_t base, mp_exp_t exp, 69 mp_size_t prec, mp_ptr tp) 70{ 71 mp_size_t ign; /* counts number of ignored low limbs in r */ 72 mp_size_t off; /* keeps track of offset where value starts */ 73 mp_ptr passed_rp = rp; 74 mp_size_t rn; 75 int cnt; 76 int i; 77 78 rp[0] = base; 79 rn = 1; 80 off = 0; 81 ign = 0; 82 count_leading_zeros (cnt, exp); 83 for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--) 84 { 85 mpn_sqr (tp, rp + off, rn); 86 rn = 2 * rn; 87 rn -= tp[rn - 1] == 0; 88 ign <<= 1; 89 90 off = 0; 91 if (rn > prec) 92 { 93 ign += rn - prec; 94 off = rn - prec; 95 rn = prec; 96 } 97 MP_PTR_SWAP (rp, tp); 98 99 if (((exp >> i) & 1) != 0) 100 { 101 mp_limb_t cy; 102 cy = mpn_mul_1 (rp, rp + off, rn, base); 103 rp[rn] = cy; 104 rn += cy != 0; 105 off = 0; 106 } 107 } 108 109 if (rn > prec) 110 { 111 ign += rn - prec; 112 rp += rn - prec; 113 rn = prec; 114 } 115 116 MPN_COPY_INCR (passed_rp, rp + off, rn); 117 *ignp = ign; 118 return rn; 119} 120 121int 122mpf_set_str (mpf_ptr x, const char *str, int base) 123{ 124 size_t str_size; 125 char *s, *begs; 126 size_t i, j; 127 int c; 128 int negative; 129 char *dotpos; 130 const char *expptr; 131 int exp_base; 132 const char *point = GMP_DECIMAL_POINT; 133 size_t pointlen = strlen (point); 134 const unsigned char *digit_value; 135 int incr; 136 size_t n_zeros_skipped; 137 138 TMP_DECL; 139 140 c = (unsigned char) *str; 141 142 /* Skip whitespace. */ 143 while (isspace (c)) 144 c = (unsigned char) *++str; 145 146 negative = 0; 147 if (c == '-') 148 { 149 negative = 1; 150 c = (unsigned char) *++str; 151 } 152 153 /* Default base to decimal. */ 154 if (base == 0) 155 base = 10; 156 157 exp_base = base; 158 159 if (base < 0) 160 { 161 exp_base = 10; 162 base = -base; 163 } 164 165 digit_value = digit_value_tab; 166 if (base > 36) 167 { 168 /* For bases > 36, use the collating sequence 169 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz. */ 170 digit_value += 208; 171 if (base > 62) 172 return -1; /* too large base */ 173 } 174 175 /* Require at least one digit, possibly after an initial decimal point. */ 176 if (digit_value[c] >= base) 177 { 178 /* not a digit, must be a decimal point */ 179 for (i = 0; i < pointlen; i++) 180 if (str[i] != point[i]) 181 return -1; 182 if (digit_value[(unsigned char) str[pointlen]] >= base) 183 return -1; 184 } 185 186 /* Locate exponent part of the input. Look from the right of the string, 187 since the exponent is usually a lot shorter than the mantissa. */ 188 expptr = NULL; 189 str_size = strlen (str); 190 for (i = str_size - 1; i > 0; i--) 191 { 192 c = (unsigned char) str[i]; 193 if (c == '@' || (base <= 10 && (c == 'e' || c == 'E'))) 194 { 195 expptr = str + i + 1; 196 str_size = i; 197 break; 198 } 199 } 200 201 TMP_MARK; 202 s = begs = (char *) TMP_ALLOC (str_size + 1); 203 204 incr = 0; 205 n_zeros_skipped = 0; 206 dotpos = NULL; 207 208 /* Loop through mantissa, converting it from ASCII to raw byte values. */ 209 for (i = 0; i < str_size; i++) 210 { 211 c = (unsigned char) *str; 212 if (!isspace (c)) 213 { 214 int dig; 215 216 for (j = 0; j < pointlen; j++) 217 if (str[j] != point[j]) 218 goto not_point; 219 if (1) 220 { 221 if (dotpos != 0) 222 { 223 /* already saw a decimal point, another is invalid */ 224 TMP_FREE; 225 return -1; 226 } 227 dotpos = s; 228 str += pointlen - 1; 229 i += pointlen - 1; 230 } 231 else 232 { 233 not_point: 234 dig = digit_value[c]; 235 if (dig >= base) 236 { 237 TMP_FREE; 238 return -1; 239 } 240 *s = dig; 241 incr |= dig != 0; 242 s += incr; /* Increment after first non-0 digit seen. */ 243 if (dotpos != NULL) 244 /* Count skipped zeros between radix point and first non-0 245 digit. */ 246 n_zeros_skipped += 1 - incr; 247 } 248 } 249 c = (unsigned char) *++str; 250 } 251 252 str_size = s - begs; 253 254 { 255 long exp_in_base; 256 mp_size_t ra, ma, rn, mn; 257 int cnt; 258 mp_ptr mp, tp, rp; 259 mp_exp_t exp_in_limbs; 260 mp_size_t prec = PREC(x) + 1; 261 int divflag; 262 mp_size_t madj, radj; 263 264#if 0 265 size_t n_chars_needed; 266 267 /* This needs careful testing. Leave disabled for now. */ 268 /* Just consider the relevant leading digits of the mantissa. */ 269 LIMBS_PER_DIGIT_IN_BASE (n_chars_needed, prec, base); 270 if (str_size > n_chars_needed) 271 str_size = n_chars_needed; 272#endif 273 274 if (str_size == 0) 275 { 276 SIZ(x) = 0; 277 EXP(x) = 0; 278 TMP_FREE; 279 return 0; 280 } 281 282 LIMBS_PER_DIGIT_IN_BASE (ma, str_size, base); 283 mp = TMP_ALLOC_LIMBS (ma); 284 mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base); 285 286 madj = 0; 287 /* Ignore excess limbs in MP,MSIZE. */ 288 if (mn > prec) 289 { 290 madj = mn - prec; 291 mp += mn - prec; 292 mn = prec; 293 } 294 295 if (expptr != 0) 296 { 297 /* Scan and convert the exponent, in base exp_base. */ 298 long dig, minus, plusminus; 299 c = (unsigned char) *expptr; 300 minus = -(long) (c == '-'); 301 plusminus = minus | -(long) (c == '+'); 302 expptr -= plusminus; /* conditional increment */ 303 c = (unsigned char) *expptr++; 304 dig = digit_value[c]; 305 if (dig >= exp_base) 306 { 307 TMP_FREE; 308 return -1; 309 } 310 exp_in_base = dig; 311 c = (unsigned char) *expptr++; 312 dig = digit_value[c]; 313 while (dig < exp_base) 314 { 315 exp_in_base = exp_in_base * exp_base; 316 exp_in_base += dig; 317 c = (unsigned char) *expptr++; 318 dig = digit_value[c]; 319 } 320 exp_in_base = (exp_in_base ^ minus) - minus; /* conditional negation */ 321 } 322 else 323 exp_in_base = 0; 324 if (dotpos != 0) 325 exp_in_base -= s - dotpos + n_zeros_skipped; 326 divflag = exp_in_base < 0; 327 exp_in_base = ABS (exp_in_base); 328 329 if (exp_in_base == 0) 330 { 331 MPN_COPY (PTR(x), mp, mn); 332 SIZ(x) = negative ? -mn : mn; 333 EXP(x) = mn + madj; 334 TMP_FREE; 335 return 0; 336 } 337 338 ra = 2 * (prec + 1); 339 TMP_ALLOC_LIMBS_2 (rp, ra, tp, ra); 340 rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp); 341 342 if (divflag) 343 { 344#if 0 345 /* FIXME: Should use mpn_div_q here. */ 346 ... 347 mpn_div_q (tp, mp, mn, rp, rn, scratch); 348 ... 349#else 350 mp_ptr qp; 351 mp_limb_t qlimb; 352 if (mn < rn) 353 { 354 /* Pad out MP,MSIZE for current divrem semantics. */ 355 mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1); 356 MPN_ZERO (tmp, rn - mn); 357 MPN_COPY (tmp + rn - mn, mp, mn); 358 mp = tmp; 359 madj -= rn - mn; 360 mn = rn; 361 } 362 if ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0) 363 { 364 mp_limb_t cy; 365 count_leading_zeros (cnt, rp[rn - 1]); 366 cnt -= GMP_NAIL_BITS; 367 mpn_lshift (rp, rp, rn, cnt); 368 cy = mpn_lshift (mp, mp, mn, cnt); 369 if (cy) 370 mp[mn++] = cy; 371 } 372 373 qp = TMP_ALLOC_LIMBS (prec + 1); 374 qlimb = mpn_divrem (qp, prec - (mn - rn), mp, mn, rp, rn); 375 tp = qp; 376 exp_in_limbs = qlimb + (mn - rn) + (madj - radj); 377 rn = prec; 378 if (qlimb != 0) 379 { 380 tp[prec] = qlimb; 381 /* Skip the least significant limb not to overrun the destination 382 variable. */ 383 tp++; 384 } 385#endif 386 } 387 else 388 { 389 tp = TMP_ALLOC_LIMBS (rn + mn); 390 if (rn > mn) 391 mpn_mul (tp, rp, rn, mp, mn); 392 else 393 mpn_mul (tp, mp, mn, rp, rn); 394 rn += mn; 395 rn -= tp[rn - 1] == 0; 396 exp_in_limbs = rn + madj + radj; 397 398 if (rn > prec) 399 { 400 tp += rn - prec; 401 rn = prec; 402 exp_in_limbs += 0; 403 } 404 } 405 406 MPN_COPY (PTR(x), tp, rn); 407 SIZ(x) = negative ? -rn : rn; 408 EXP(x) = exp_in_limbs; 409 TMP_FREE; 410 return 0; 411 } 412} 413