1################################################################################
2##
3##  Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
4##  Version 2.x, Copyright (C) 2001, Paul Marquess.
5##  Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
6##
7##  This program is free software; you can redistribute it and/or
8##  modify it under the same terms as Perl itself.
9##
10################################################################################
11
12=provides
13
14grok_hex
15grok_oct
16grok_bin
17grok_numeric_radix
18grok_number
19__UNDEFINED__
20
21=implementation
22
23__UNDEFINED__  IN_PERL_COMPILETIME    (PL_curcop == &PL_compiling)
24__UNDEFINED__  IN_LOCALE_RUNTIME      (PL_curcop->op_private & HINT_LOCALE)
25__UNDEFINED__  IN_LOCALE_COMPILETIME  (PL_hints & HINT_LOCALE)
26__UNDEFINED__  IN_LOCALE              (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
27
28__UNDEFINED__  IS_NUMBER_IN_UV                 0x01
29__UNDEFINED__  IS_NUMBER_GREATER_THAN_UV_MAX   0x02
30__UNDEFINED__  IS_NUMBER_NOT_INT               0x04
31__UNDEFINED__  IS_NUMBER_NEG                   0x08
32__UNDEFINED__  IS_NUMBER_INFINITY              0x10
33__UNDEFINED__  IS_NUMBER_NAN                   0x20
34
35__UNDEFINED__  GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)
36
37__UNDEFINED__  PERL_SCAN_GREATER_THAN_UV_MAX   0x02
38__UNDEFINED__  PERL_SCAN_SILENT_ILLDIGIT       0x04
39__UNDEFINED__  PERL_SCAN_ALLOW_UNDERSCORES     0x01
40__UNDEFINED__  PERL_SCAN_DISALLOW_PREFIX       0x02
41
42#ifndef grok_numeric_radix
43#if { NEED grok_numeric_radix }
44bool
45grok_numeric_radix(pTHX_ const char **sp, const char *send)
46{
47#ifdef USE_LOCALE_NUMERIC
48#ifdef PL_numeric_radix_sv
49    if (PL_numeric_radix_sv && IN_LOCALE) {
50        STRLEN len;
51        char* radix = SvPV(PL_numeric_radix_sv, len);
52        if (*sp + len <= send && memEQ(*sp, radix, len)) {
53            *sp += len;
54            return TRUE;
55        }
56    }
57#else
58    /* older perls don't have PL_numeric_radix_sv so the radix
59     * must manually be requested from locale.h
60     */
61#include <locale.h>
62    dTHR;  /* needed for older threaded perls */
63    struct lconv *lc = localeconv();
64    char *radix = lc->decimal_point;
65    if (radix && IN_LOCALE) {
66        STRLEN len = strlen(radix);
67        if (*sp + len <= send && memEQ(*sp, radix, len)) {
68            *sp += len;
69            return TRUE;
70        }
71    }
72#endif
73#endif /* USE_LOCALE_NUMERIC */
74    /* always try "." if numeric radix didn't match because
75     * we may have data from different locales mixed */
76    if (*sp < send && **sp == '.') {
77        ++*sp;
78        return TRUE;
79    }
80    return FALSE;
81}
82#endif
83#endif
84
85#ifndef grok_number
86#if { NEED grok_number }
87int
88grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
89{
90  const char *s = pv;
91  const char *send = pv + len;
92  const UV max_div_10 = UV_MAX / 10;
93  const char max_mod_10 = UV_MAX % 10;
94  int numtype = 0;
95  int sawinf = 0;
96  int sawnan = 0;
97
98  while (s < send && isSPACE(*s))
99    s++;
100  if (s == send) {
101    return 0;
102  } else if (*s == '-') {
103    s++;
104    numtype = IS_NUMBER_NEG;
105  }
106  else if (*s == '+')
107  s++;
108
109  if (s == send)
110    return 0;
111
112  /* next must be digit or the radix separator or beginning of infinity */
113  if (isDIGIT(*s)) {
114    /* UVs are at least 32 bits, so the first 9 decimal digits cannot
115       overflow.  */
116    UV value = *s - '0';
117    /* This construction seems to be more optimiser friendly.
118       (without it gcc does the isDIGIT test and the *s - '0' separately)
119       With it gcc on arm is managing 6 instructions (6 cycles) per digit.
120       In theory the optimiser could deduce how far to unroll the loop
121       before checking for overflow.  */
122    if (++s < send) {
123      int digit = *s - '0';
124      if (digit >= 0 && digit <= 9) {
125        value = value * 10 + digit;
126        if (++s < send) {
127          digit = *s - '0';
128          if (digit >= 0 && digit <= 9) {
129            value = value * 10 + digit;
130            if (++s < send) {
131              digit = *s - '0';
132              if (digit >= 0 && digit <= 9) {
133                value = value * 10 + digit;
134                if (++s < send) {
135                  digit = *s - '0';
136                  if (digit >= 0 && digit <= 9) {
137                    value = value * 10 + digit;
138                    if (++s < send) {
139                      digit = *s - '0';
140                      if (digit >= 0 && digit <= 9) {
141                        value = value * 10 + digit;
142                        if (++s < send) {
143                          digit = *s - '0';
144                          if (digit >= 0 && digit <= 9) {
145                            value = value * 10 + digit;
146                            if (++s < send) {
147                              digit = *s - '0';
148                              if (digit >= 0 && digit <= 9) {
149                                value = value * 10 + digit;
150                                if (++s < send) {
151                                  digit = *s - '0';
152                                  if (digit >= 0 && digit <= 9) {
153                                    value = value * 10 + digit;
154                                    if (++s < send) {
155                                      /* Now got 9 digits, so need to check
156                                         each time for overflow.  */
157                                      digit = *s - '0';
158                                      while (digit >= 0 && digit <= 9
159                                             && (value < max_div_10
160                                                 || (value == max_div_10
161                                                     && digit <= max_mod_10))) {
162                                        value = value * 10 + digit;
163                                        if (++s < send)
164                                          digit = *s - '0';
165                                        else
166                                          break;
167                                      }
168                                      if (digit >= 0 && digit <= 9
169                                          && (s < send)) {
170                                        /* value overflowed.
171                                           skip the remaining digits, don't
172                                           worry about setting *valuep.  */
173                                        do {
174                                          s++;
175                                        } while (s < send && isDIGIT(*s));
176                                        numtype |=
177                                          IS_NUMBER_GREATER_THAN_UV_MAX;
178                                        goto skip_value;
179                                      }
180                                    }
181                                  }
182                                }
183                              }
184                            }
185                          }
186                        }
187                      }
188                    }
189                  }
190                }
191              }
192            }
193          }
194        }
195      }
196    }
197    numtype |= IS_NUMBER_IN_UV;
198    if (valuep)
199      *valuep = value;
200
201  skip_value:
202    if (GROK_NUMERIC_RADIX(&s, send)) {
203      numtype |= IS_NUMBER_NOT_INT;
204      while (s < send && isDIGIT(*s))  /* optional digits after the radix */
205        s++;
206    }
207  }
208  else if (GROK_NUMERIC_RADIX(&s, send)) {
209    numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
210    /* no digits before the radix means we need digits after it */
211    if (s < send && isDIGIT(*s)) {
212      do {
213        s++;
214      } while (s < send && isDIGIT(*s));
215      if (valuep) {
216        /* integer approximation is valid - it's 0.  */
217        *valuep = 0;
218      }
219    }
220    else
221      return 0;
222  } else if (*s == 'I' || *s == 'i') {
223    s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
224    s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
225    s++; if (s < send && (*s == 'I' || *s == 'i')) {
226      s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
227      s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
228      s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
229      s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
230      s++;
231    }
232    sawinf = 1;
233  } else if (*s == 'N' || *s == 'n') {
234    /* XXX TODO: There are signaling NaNs and quiet NaNs. */
235    s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
236    s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
237    s++;
238    sawnan = 1;
239  } else
240    return 0;
241
242  if (sawinf) {
243    numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
244    numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
245  } else if (sawnan) {
246    numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
247    numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
248  } else if (s < send) {
249    /* we can have an optional exponent part */
250    if (*s == 'e' || *s == 'E') {
251      /* The only flag we keep is sign.  Blow away any "it's UV"  */
252      numtype &= IS_NUMBER_NEG;
253      numtype |= IS_NUMBER_NOT_INT;
254      s++;
255      if (s < send && (*s == '-' || *s == '+'))
256        s++;
257      if (s < send && isDIGIT(*s)) {
258        do {
259          s++;
260        } while (s < send && isDIGIT(*s));
261      }
262      else
263      return 0;
264    }
265  }
266  while (s < send && isSPACE(*s))
267    s++;
268  if (s >= send)
269    return numtype;
270  if (len == 10 && memEQ(pv, "0 but true", 10)) {
271    if (valuep)
272      *valuep = 0;
273    return IS_NUMBER_IN_UV;
274  }
275  return 0;
276}
277#endif
278#endif
279
280/*
281 * The grok_* routines have been modified to use warn() instead of
282 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit,
283 * which is why the stack variable has been renamed to 'xdigit'.
284 */
285
286#ifndef grok_bin
287#if { NEED grok_bin }
288UV
289grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
290{
291    const char *s = start;
292    STRLEN len = *len_p;
293    UV value = 0;
294    NV value_nv = 0;
295
296    const UV max_div_2 = UV_MAX / 2;
297    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
298    bool overflowed = FALSE;
299
300    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
301        /* strip off leading b or 0b.
302           for compatibility silently suffer "b" and "0b" as valid binary
303           numbers. */
304        if (len >= 1) {
305            if (s[0] == 'b') {
306                s++;
307                len--;
308            }
309            else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
310                s+=2;
311                len-=2;
312            }
313        }
314    }
315
316    for (; len-- && *s; s++) {
317        char bit = *s;
318        if (bit == '0' || bit == '1') {
319            /* Write it in this wonky order with a goto to attempt to get the
320               compiler to make the common case integer-only loop pretty tight.
321               With gcc seems to be much straighter code than old scan_bin.  */
322          redo:
323            if (!overflowed) {
324                if (value <= max_div_2) {
325                    value = (value << 1) | (bit - '0');
326                    continue;
327                }
328                /* Bah. We're just overflowed.  */
329                warn("Integer overflow in binary number");
330                overflowed = TRUE;
331                value_nv = (NV) value;
332            }
333            value_nv *= 2.0;
334            /* If an NV has not enough bits in its mantissa to
335             * represent a UV this summing of small low-order numbers
336             * is a waste of time (because the NV cannot preserve
337             * the low-order bits anyway): we could just remember when
338             * did we overflow and in the end just multiply value_nv by the
339             * right amount. */
340            value_nv += (NV)(bit - '0');
341            continue;
342        }
343        if (bit == '_' && len && allow_underscores && (bit = s[1])
344            && (bit == '0' || bit == '1'))
345            {
346                --len;
347                ++s;
348                goto redo;
349            }
350        if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
351            warn("Illegal binary digit '%c' ignored", *s);
352        break;
353    }
354
355    if (   ( overflowed && value_nv > 4294967295.0)
356#if UVSIZE > 4
357        || (!overflowed && value > 0xffffffff  )
358#endif
359        ) {
360        warn("Binary number > 0b11111111111111111111111111111111 non-portable");
361    }
362    *len_p = s - start;
363    if (!overflowed) {
364        *flags = 0;
365        return value;
366    }
367    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
368    if (result)
369        *result = value_nv;
370    return UV_MAX;
371}
372#endif
373#endif
374
375#ifndef grok_hex
376#if { NEED grok_hex }
377UV
378grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
379{
380    const char *s = start;
381    STRLEN len = *len_p;
382    UV value = 0;
383    NV value_nv = 0;
384
385    const UV max_div_16 = UV_MAX / 16;
386    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
387    bool overflowed = FALSE;
388    const char *xdigit;
389
390    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
391        /* strip off leading x or 0x.
392           for compatibility silently suffer "x" and "0x" as valid hex numbers.
393        */
394        if (len >= 1) {
395            if (s[0] == 'x') {
396                s++;
397                len--;
398            }
399            else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
400                s+=2;
401                len-=2;
402            }
403        }
404    }
405
406    for (; len-- && *s; s++) {
407        xdigit = strchr((char *) PL_hexdigit, *s);
408        if (xdigit) {
409            /* Write it in this wonky order with a goto to attempt to get the
410               compiler to make the common case integer-only loop pretty tight.
411               With gcc seems to be much straighter code than old scan_hex.  */
412          redo:
413            if (!overflowed) {
414                if (value <= max_div_16) {
415                    value = (value << 4) | ((xdigit - PL_hexdigit) & 15);
416                    continue;
417                }
418                warn("Integer overflow in hexadecimal number");
419                overflowed = TRUE;
420                value_nv = (NV) value;
421            }
422            value_nv *= 16.0;
423            /* If an NV has not enough bits in its mantissa to
424             * represent a UV this summing of small low-order numbers
425             * is a waste of time (because the NV cannot preserve
426             * the low-order bits anyway): we could just remember when
427             * did we overflow and in the end just multiply value_nv by the
428             * right amount of 16-tuples. */
429            value_nv += (NV)((xdigit - PL_hexdigit) & 15);
430            continue;
431        }
432        if (*s == '_' && len && allow_underscores && s[1]
433                && (xdigit = strchr((char *) PL_hexdigit, s[1])))
434            {
435                --len;
436                ++s;
437                goto redo;
438            }
439        if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
440            warn("Illegal hexadecimal digit '%c' ignored", *s);
441        break;
442    }
443
444    if (   ( overflowed && value_nv > 4294967295.0)
445#if UVSIZE > 4
446        || (!overflowed && value > 0xffffffff  )
447#endif
448        ) {
449        warn("Hexadecimal number > 0xffffffff non-portable");
450    }
451    *len_p = s - start;
452    if (!overflowed) {
453        *flags = 0;
454        return value;
455    }
456    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
457    if (result)
458        *result = value_nv;
459    return UV_MAX;
460}
461#endif
462#endif
463
464#ifndef grok_oct
465#if { NEED grok_oct }
466UV
467grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
468{
469    const char *s = start;
470    STRLEN len = *len_p;
471    UV value = 0;
472    NV value_nv = 0;
473
474    const UV max_div_8 = UV_MAX / 8;
475    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
476    bool overflowed = FALSE;
477
478    for (; len-- && *s; s++) {
479         /* gcc 2.95 optimiser not smart enough to figure that this subtraction
480            out front allows slicker code.  */
481        int digit = *s - '0';
482        if (digit >= 0 && digit <= 7) {
483            /* Write it in this wonky order with a goto to attempt to get the
484               compiler to make the common case integer-only loop pretty tight.
485            */
486          redo:
487            if (!overflowed) {
488                if (value <= max_div_8) {
489                    value = (value << 3) | digit;
490                    continue;
491                }
492                /* Bah. We're just overflowed.  */
493                warn("Integer overflow in octal number");
494                overflowed = TRUE;
495                value_nv = (NV) value;
496            }
497            value_nv *= 8.0;
498            /* If an NV has not enough bits in its mantissa to
499             * represent a UV this summing of small low-order numbers
500             * is a waste of time (because the NV cannot preserve
501             * the low-order bits anyway): we could just remember when
502             * did we overflow and in the end just multiply value_nv by the
503             * right amount of 8-tuples. */
504            value_nv += (NV)digit;
505            continue;
506        }
507        if (digit == ('_' - '0') && len && allow_underscores
508            && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
509            {
510                --len;
511                ++s;
512                goto redo;
513            }
514        /* Allow \octal to work the DWIM way (that is, stop scanning
515         * as soon as non-octal characters are seen, complain only iff
516         * someone seems to want to use the digits eight and nine). */
517        if (digit == 8 || digit == 9) {
518            if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
519                warn("Illegal octal digit '%c' ignored", *s);
520        }
521        break;
522    }
523
524    if (   ( overflowed && value_nv > 4294967295.0)
525#if UVSIZE > 4
526        || (!overflowed && value > 0xffffffff  )
527#endif
528        ) {
529        warn("Octal number > 037777777777 non-portable");
530    }
531    *len_p = s - start;
532    if (!overflowed) {
533        *flags = 0;
534        return value;
535    }
536    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
537    if (result)
538        *result = value_nv;
539    return UV_MAX;
540}
541#endif
542#endif
543
544=xsinit
545
546#define NEED_grok_number
547#define NEED_grok_numeric_radix
548#define NEED_grok_bin
549#define NEED_grok_hex
550#define NEED_grok_oct
551
552=xsubs
553
554UV
555grok_number(string)
556        SV *string
557        PREINIT:
558                const char *pv;
559                STRLEN len;
560        CODE:
561                pv = SvPV(string, len);
562                if (!grok_number(pv, len, &RETVAL))
563                  XSRETURN_UNDEF;
564        OUTPUT:
565                RETVAL
566
567UV
568grok_bin(string)
569        SV *string
570        PREINIT:
571                char *pv;
572                I32 flags = 0;
573                STRLEN len;
574        CODE:
575                pv = SvPV(string, len);
576                RETVAL = grok_bin(pv, &len, &flags, NULL);
577        OUTPUT:
578                RETVAL
579
580UV
581grok_hex(string)
582        SV *string
583        PREINIT:
584                char *pv;
585                I32 flags = 0;
586                STRLEN len;
587        CODE:
588                pv = SvPV(string, len);
589                RETVAL = grok_hex(pv, &len, &flags, NULL);
590        OUTPUT:
591                RETVAL
592
593UV
594grok_oct(string)
595        SV *string
596        PREINIT:
597                char *pv;
598                I32 flags = 0;
599                STRLEN len;
600        CODE:
601                pv = SvPV(string, len);
602                RETVAL = grok_oct(pv, &len, &flags, NULL);
603        OUTPUT:
604                RETVAL
605
606UV
607Perl_grok_number(string)
608        SV *string
609        PREINIT:
610                const char *pv;
611                STRLEN len;
612        CODE:
613                pv = SvPV(string, len);
614                if (!Perl_grok_number(aTHX_ pv, len, &RETVAL))
615                  XSRETURN_UNDEF;
616        OUTPUT:
617                RETVAL
618
619UV
620Perl_grok_bin(string)
621        SV *string
622        PREINIT:
623                char *pv;
624                I32 flags = 0;
625                STRLEN len;
626        CODE:
627                pv = SvPV(string, len);
628                RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL);
629        OUTPUT:
630                RETVAL
631
632UV
633Perl_grok_hex(string)
634        SV *string
635        PREINIT:
636                char *pv;
637                I32 flags = 0;
638                STRLEN len;
639        CODE:
640                pv = SvPV(string, len);
641                RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL);
642        OUTPUT:
643                RETVAL
644
645UV
646Perl_grok_oct(string)
647        SV *string
648        PREINIT:
649                char *pv;
650                I32 flags = 0;
651                STRLEN len;
652        CODE:
653                pv = SvPV(string, len);
654                RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL);
655        OUTPUT:
656                RETVAL
657
658=tests plan => 10
659
660is(&Devel::PPPort::grok_number("42"), 42);
661ok(!defined(&Devel::PPPort::grok_number("A")));
662is(&Devel::PPPort::grok_bin("10000001"), 129);
663is(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef);
664is(&Devel::PPPort::grok_oct("377"), 255);
665
666is(&Devel::PPPort::Perl_grok_number("42"), 42);
667ok(!defined(&Devel::PPPort::Perl_grok_number("A")));
668is(&Devel::PPPort::Perl_grok_bin("10000001"), 129);
669is(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef);
670is(&Devel::PPPort::Perl_grok_oct("377"), 255);
671