The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
################################################################################
##
##  Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
##  Version 2.x, Copyright (C) 2001, Paul Marquess.
##  Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
##
##  This program is free software; you can redistribute it and/or
##  modify it under the same terms as Perl itself.
##
################################################################################

=provides

grok_hex
grok_oct
grok_bin
grok_numeric_radix
grok_number
__UNDEFINED__

=implementation

__UNDEFINED__  IN_PERL_COMPILETIME    (PL_curcop == &PL_compiling)
__UNDEFINED__  IN_LOCALE_RUNTIME      (PL_curcop->op_private & HINT_LOCALE)
__UNDEFINED__  IN_LOCALE_COMPILETIME  (PL_hints & HINT_LOCALE)
__UNDEFINED__  IN_LOCALE              (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)

__UNDEFINED__  IS_NUMBER_IN_UV                 0x01
__UNDEFINED__  IS_NUMBER_GREATER_THAN_UV_MAX   0x02
__UNDEFINED__  IS_NUMBER_NOT_INT               0x04
__UNDEFINED__  IS_NUMBER_NEG                   0x08
__UNDEFINED__  IS_NUMBER_INFINITY              0x10
__UNDEFINED__  IS_NUMBER_NAN                   0x20

__UNDEFINED__  GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)

__UNDEFINED__  PERL_SCAN_GREATER_THAN_UV_MAX   0x02
__UNDEFINED__  PERL_SCAN_SILENT_ILLDIGIT       0x04
__UNDEFINED__  PERL_SCAN_ALLOW_UNDERSCORES     0x01
__UNDEFINED__  PERL_SCAN_DISALLOW_PREFIX       0x02

#ifndef grok_numeric_radix
#if { NEED grok_numeric_radix }
bool
grok_numeric_radix(pTHX_ const char **sp, const char *send)
{
#ifdef USE_LOCALE_NUMERIC
#ifdef PL_numeric_radix_sv
    if (PL_numeric_radix_sv && IN_LOCALE) {
        STRLEN len;
        char* radix = SvPV(PL_numeric_radix_sv, len);
        if (*sp + len <= send && memEQ(*sp, radix, len)) {
            *sp += len;
            return TRUE;
        }
    }
#else
    /* older perls don't have PL_numeric_radix_sv so the radix
     * must manually be requested from locale.h
     */
#include <locale.h>
    dTHR;  /* needed for older threaded perls */
    struct lconv *lc = localeconv();
    char *radix = lc->decimal_point;
    if (radix && IN_LOCALE) {
        STRLEN len = strlen(radix);
        if (*sp + len <= send && memEQ(*sp, radix, len)) {
            *sp += len;
            return TRUE;
        }
    }
#endif
#endif /* USE_LOCALE_NUMERIC */
    /* always try "." if numeric radix didn't match because
     * we may have data from different locales mixed */
    if (*sp < send && **sp == '.') {
        ++*sp;
        return TRUE;
    }
    return FALSE;
}
#endif
#endif

#ifndef grok_number
#if { NEED grok_number }
int
grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
{
  const char *s = pv;
  const char *send = pv + len;
  const UV max_div_10 = UV_MAX / 10;
  const char max_mod_10 = UV_MAX % 10;
  int numtype = 0;
  int sawinf = 0;
  int sawnan = 0;

  while (s < send && isSPACE(*s))
    s++;
  if (s == send) {
    return 0;
  } else if (*s == '-') {
    s++;
    numtype = IS_NUMBER_NEG;
  }
  else if (*s == '+')
  s++;

  if (s == send)
    return 0;

  /* next must be digit or the radix separator or beginning of infinity */
  if (isDIGIT(*s)) {
    /* UVs are at least 32 bits, so the first 9 decimal digits cannot
       overflow.  */
    UV value = *s - '0';
    /* This construction seems to be more optimiser friendly.
       (without it gcc does the isDIGIT test and the *s - '0' separately)
       With it gcc on arm is managing 6 instructions (6 cycles) per digit.
       In theory the optimiser could deduce how far to unroll the loop
       before checking for overflow.  */
    if (++s < send) {
      int digit = *s - '0';
      if (digit >= 0 && digit <= 9) {
        value = value * 10 + digit;
        if (++s < send) {
          digit = *s - '0';
          if (digit >= 0 && digit <= 9) {
            value = value * 10 + digit;
            if (++s < send) {
              digit = *s - '0';
              if (digit >= 0 && digit <= 9) {
                value = value * 10 + digit;
                if (++s < send) {
                  digit = *s - '0';
                  if (digit >= 0 && digit <= 9) {
                    value = value * 10 + digit;
                    if (++s < send) {
                      digit = *s - '0';
                      if (digit >= 0 && digit <= 9) {
                        value = value * 10 + digit;
                        if (++s < send) {
                          digit = *s - '0';
                          if (digit >= 0 && digit <= 9) {
                            value = value * 10 + digit;
                            if (++s < send) {
                              digit = *s - '0';
                              if (digit >= 0 && digit <= 9) {
                                value = value * 10 + digit;
                                if (++s < send) {
                                  digit = *s - '0';
                                  if (digit >= 0 && digit <= 9) {
                                    value = value * 10 + digit;
                                    if (++s < send) {
                                      /* Now got 9 digits, so need to check
                                         each time for overflow.  */
                                      digit = *s - '0';
                                      while (digit >= 0 && digit <= 9
                                             && (value < max_div_10
                                                 || (value == max_div_10
                                                     && digit <= max_mod_10))) {
                                        value = value * 10 + digit;
                                        if (++s < send)
                                          digit = *s - '0';
                                        else
                                          break;
                                      }
                                      if (digit >= 0 && digit <= 9
                                          && (s < send)) {
                                        /* value overflowed.
                                           skip the remaining digits, don't
                                           worry about setting *valuep.  */
                                        do {
                                          s++;
                                        } while (s < send && isDIGIT(*s));
                                        numtype |=
                                          IS_NUMBER_GREATER_THAN_UV_MAX;
                                        goto skip_value;
                                      }
                                    }
                                  }
                                }
                              }
                            }
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    numtype |= IS_NUMBER_IN_UV;
    if (valuep)
      *valuep = value;

  skip_value:
    if (GROK_NUMERIC_RADIX(&s, send)) {
      numtype |= IS_NUMBER_NOT_INT;
      while (s < send && isDIGIT(*s))  /* optional digits after the radix */
        s++;
    }
  }
  else if (GROK_NUMERIC_RADIX(&s, send)) {
    numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
    /* no digits before the radix means we need digits after it */
    if (s < send && isDIGIT(*s)) {
      do {
        s++;
      } while (s < send && isDIGIT(*s));
      if (valuep) {
        /* integer approximation is valid - it's 0.  */
        *valuep = 0;
      }
    }
    else
      return 0;
  } else if (*s == 'I' || *s == 'i') {
    s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
    s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
    s++; if (s < send && (*s == 'I' || *s == 'i')) {
      s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
      s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
      s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
      s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
      s++;
    }
    sawinf = 1;
  } else if (*s == 'N' || *s == 'n') {
    /* XXX TODO: There are signaling NaNs and quiet NaNs. */
    s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
    s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
    s++;
    sawnan = 1;
  } else
    return 0;

  if (sawinf) {
    numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
    numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
  } else if (sawnan) {
    numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
    numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
  } else if (s < send) {
    /* we can have an optional exponent part */
    if (*s == 'e' || *s == 'E') {
      /* The only flag we keep is sign.  Blow away any "it's UV"  */
      numtype &= IS_NUMBER_NEG;
      numtype |= IS_NUMBER_NOT_INT;
      s++;
      if (s < send && (*s == '-' || *s == '+'))
        s++;
      if (s < send && isDIGIT(*s)) {
        do {
          s++;
        } while (s < send && isDIGIT(*s));
      }
      else
      return 0;
    }
  }
  while (s < send && isSPACE(*s))
    s++;
  if (s >= send)
    return numtype;
  if (len == 10 && memEQ(pv, "0 but true", 10)) {
    if (valuep)
      *valuep = 0;
    return IS_NUMBER_IN_UV;
  }
  return 0;
}
#endif
#endif

/*
 * The grok_* routines have been modified to use warn() instead of
 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit,
 * which is why the stack variable has been renamed to 'xdigit'.
 */

#ifndef grok_bin
#if { NEED grok_bin }
UV
grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
    const char *s = start;
    STRLEN len = *len_p;
    UV value = 0;
    NV value_nv = 0;

    const UV max_div_2 = UV_MAX / 2;
    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
    bool overflowed = FALSE;

    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
        /* strip off leading b or 0b.
           for compatibility silently suffer "b" and "0b" as valid binary
           numbers. */
        if (len >= 1) {
            if (s[0] == 'b') {
                s++;
                len--;
            }
            else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
                s+=2;
                len-=2;
            }
        }
    }

    for (; len-- && *s; s++) {
        char bit = *s;
        if (bit == '0' || bit == '1') {
            /* Write it in this wonky order with a goto to attempt to get the
               compiler to make the common case integer-only loop pretty tight.
               With gcc seems to be much straighter code than old scan_bin.  */
          redo:
            if (!overflowed) {
                if (value <= max_div_2) {
                    value = (value << 1) | (bit - '0');
                    continue;
                }
                /* Bah. We're just overflowed.  */
                warn("Integer overflow in binary number");
                overflowed = TRUE;
                value_nv = (NV) value;
            }
            value_nv *= 2.0;
            /* If an NV has not enough bits in its mantissa to
             * represent a UV this summing of small low-order numbers
             * is a waste of time (because the NV cannot preserve
             * the low-order bits anyway): we could just remember when
             * did we overflow and in the end just multiply value_nv by the
             * right amount. */
            value_nv += (NV)(bit - '0');
            continue;
        }
        if (bit == '_' && len && allow_underscores && (bit = s[1])
            && (bit == '0' || bit == '1'))
            {
                --len;
                ++s;
                goto redo;
            }
        if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
            warn("Illegal binary digit '%c' ignored", *s);
        break;
    }

    if (   ( overflowed && value_nv > 4294967295.0)
#if UVSIZE > 4
        || (!overflowed && value > 0xffffffff  )
#endif
        ) {
        warn("Binary number > 0b11111111111111111111111111111111 non-portable");
    }
    *len_p = s - start;
    if (!overflowed) {
        *flags = 0;
        return value;
    }
    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
    if (result)
        *result = value_nv;
    return UV_MAX;
}
#endif
#endif

#ifndef grok_hex
#if { NEED grok_hex }
UV
grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
    const char *s = start;
    STRLEN len = *len_p;
    UV value = 0;
    NV value_nv = 0;

    const UV max_div_16 = UV_MAX / 16;
    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
    bool overflowed = FALSE;
    const char *xdigit;

    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
        /* strip off leading x or 0x.
           for compatibility silently suffer "x" and "0x" as valid hex numbers.
        */
        if (len >= 1) {
            if (s[0] == 'x') {
                s++;
                len--;
            }
            else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
                s+=2;
                len-=2;
            }
        }
    }

    for (; len-- && *s; s++) {
        xdigit = strchr((char *) PL_hexdigit, *s);
        if (xdigit) {
            /* Write it in this wonky order with a goto to attempt to get the
               compiler to make the common case integer-only loop pretty tight.
               With gcc seems to be much straighter code than old scan_hex.  */
          redo:
            if (!overflowed) {
                if (value <= max_div_16) {
                    value = (value << 4) | ((xdigit - PL_hexdigit) & 15);
                    continue;
                }
                warn("Integer overflow in hexadecimal number");
                overflowed = TRUE;
                value_nv = (NV) value;
            }
            value_nv *= 16.0;
            /* If an NV has not enough bits in its mantissa to
             * represent a UV this summing of small low-order numbers
             * is a waste of time (because the NV cannot preserve
             * the low-order bits anyway): we could just remember when
             * did we overflow and in the end just multiply value_nv by the
             * right amount of 16-tuples. */
            value_nv += (NV)((xdigit - PL_hexdigit) & 15);
            continue;
        }
        if (*s == '_' && len && allow_underscores && s[1]
                && (xdigit = strchr((char *) PL_hexdigit, s[1])))
            {
                --len;
                ++s;
                goto redo;
            }
        if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
            warn("Illegal hexadecimal digit '%c' ignored", *s);
        break;
    }

    if (   ( overflowed && value_nv > 4294967295.0)
#if UVSIZE > 4
        || (!overflowed && value > 0xffffffff  )
#endif
        ) {
        warn("Hexadecimal number > 0xffffffff non-portable");
    }
    *len_p = s - start;
    if (!overflowed) {
        *flags = 0;
        return value;
    }
    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
    if (result)
        *result = value_nv;
    return UV_MAX;
}
#endif
#endif

#ifndef grok_oct
#if { NEED grok_oct }
UV
grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
    const char *s = start;
    STRLEN len = *len_p;
    UV value = 0;
    NV value_nv = 0;

    const UV max_div_8 = UV_MAX / 8;
    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
    bool overflowed = FALSE;

    for (; len-- && *s; s++) {
         /* gcc 2.95 optimiser not smart enough to figure that this subtraction
            out front allows slicker code.  */
        int digit = *s - '0';
        if (digit >= 0 && digit <= 7) {
            /* Write it in this wonky order with a goto to attempt to get the
               compiler to make the common case integer-only loop pretty tight.
            */
          redo:
            if (!overflowed) {
                if (value <= max_div_8) {
                    value = (value << 3) | digit;
                    continue;
                }
                /* Bah. We're just overflowed.  */
                warn("Integer overflow in octal number");
                overflowed = TRUE;
                value_nv = (NV) value;
            }
            value_nv *= 8.0;
            /* If an NV has not enough bits in its mantissa to
             * represent a UV this summing of small low-order numbers
             * is a waste of time (because the NV cannot preserve
             * the low-order bits anyway): we could just remember when
             * did we overflow and in the end just multiply value_nv by the
             * right amount of 8-tuples. */
            value_nv += (NV)digit;
            continue;
        }
        if (digit == ('_' - '0') && len && allow_underscores
            && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
            {
                --len;
                ++s;
                goto redo;
            }
        /* Allow \octal to work the DWIM way (that is, stop scanning
         * as soon as non-octal characters are seen, complain only iff
         * someone seems to want to use the digits eight and nine). */
        if (digit == 8 || digit == 9) {
            if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
                warn("Illegal octal digit '%c' ignored", *s);
        }
        break;
    }

    if (   ( overflowed && value_nv > 4294967295.0)
#if UVSIZE > 4
        || (!overflowed && value > 0xffffffff  )
#endif
        ) {
        warn("Octal number > 037777777777 non-portable");
    }
    *len_p = s - start;
    if (!overflowed) {
        *flags = 0;
        return value;
    }
    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
    if (result)
        *result = value_nv;
    return UV_MAX;
}
#endif
#endif

=xsinit

#define NEED_grok_number
#define NEED_grok_numeric_radix
#define NEED_grok_bin
#define NEED_grok_hex
#define NEED_grok_oct

=xsubs

UV
grok_number(string)
        SV *string
        PREINIT:
                const char *pv;
                STRLEN len;
        CODE:
                pv = SvPV(string, len);
                if (!grok_number(pv, len, &RETVAL))
                  XSRETURN_UNDEF;
        OUTPUT:
                RETVAL

UV
grok_bin(string)
        SV *string
        PREINIT:
                char *pv;
                I32 flags = 0;
                STRLEN len;
        CODE:
                pv = SvPV(string, len);
                RETVAL = grok_bin(pv, &len, &flags, NULL);
        OUTPUT:
                RETVAL

UV
grok_hex(string)
        SV *string
        PREINIT:
                char *pv;
                I32 flags = 0;
                STRLEN len;
        CODE:
                pv = SvPV(string, len);
                RETVAL = grok_hex(pv, &len, &flags, NULL);
        OUTPUT:
                RETVAL

UV
grok_oct(string)
        SV *string
        PREINIT:
                char *pv;
                I32 flags = 0;
                STRLEN len;
        CODE:
                pv = SvPV(string, len);
                RETVAL = grok_oct(pv, &len, &flags, NULL);
        OUTPUT:
                RETVAL

UV
Perl_grok_number(string)
        SV *string
        PREINIT:
                const char *pv;
                STRLEN len;
        CODE:
                pv = SvPV(string, len);
                if (!Perl_grok_number(aTHX_ pv, len, &RETVAL))
                  XSRETURN_UNDEF;
        OUTPUT:
                RETVAL

UV
Perl_grok_bin(string)
        SV *string
        PREINIT:
                char *pv;
                I32 flags = 0;
                STRLEN len;
        CODE:
                pv = SvPV(string, len);
                RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL);
        OUTPUT:
                RETVAL

UV
Perl_grok_hex(string)
        SV *string
        PREINIT:
                char *pv;
                I32 flags = 0;
                STRLEN len;
        CODE:
                pv = SvPV(string, len);
                RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL);
        OUTPUT:
                RETVAL

UV
Perl_grok_oct(string)
        SV *string
        PREINIT:
                char *pv;
                I32 flags = 0;
                STRLEN len;
        CODE:
                pv = SvPV(string, len);
                RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL);
        OUTPUT:
                RETVAL

=tests plan => 10

ok(&Devel::PPPort::grok_number("42"), 42);
ok(!defined(&Devel::PPPort::grok_number("A")));
ok(&Devel::PPPort::grok_bin("10000001"), 129);
ok(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef);
ok(&Devel::PPPort::grok_oct("377"), 255);

ok(&Devel::PPPort::Perl_grok_number("42"), 42);
ok(!defined(&Devel::PPPort::Perl_grok_number("A")));
ok(&Devel::PPPort::Perl_grok_bin("10000001"), 129);
ok(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef);
ok(&Devel::PPPort::Perl_grok_oct("377"), 255);