The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Number::Range::Regex::Util
#
# Copyright 2012 Brian Szymanski.  All rights reserved.  This module is
# free software; you can redistribute it and/or modify it under the same
# terms as Perl itself.

package Number::Range::Regex::Util;

use strict;
use vars qw ( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION );
eval { require warnings; }; #it's ok if we can't load warnings

require Exporter;
use base 'Exporter';
@ISA    = qw( Exporter );
@EXPORT = qw ( option_mangler has_regex_overloading
               multi_union empty_set
               base_chr base_ord base_digits base_next base_prev
               _calculate_digit_range );
@EXPORT_OK = qw ( _order_by_min ) ;
%EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] );

$VERSION = '0.32';

require overload;
sub has_regex_overloading {
  # http://www.gossamer-threads.com/lists/perl/porters/244314
  # http://search.cpan.org/~jesse/perl-5.12.0/pod/perl5120delta.pod#qr_overload$
  # 1.08, 1.09 are too low. 1.10: works
  # http://search.cpan.org/~jesse/perl-5.11.1/lib/overload.pm
  return defined $overload::VERSION && $overload::VERSION > '1.09';
}

sub empty_set {
  shift;
  return Number::Range::Regex::CompoundRange->new( @_ );
}

sub multi_union {
  my $opts = option_mangler( ref $_[-1] eq 'HASH' ? pop : undef );
  my $warn_overlap = delete $opts->{warn_overlap};
  my @ranges = @_;
  my $self = empty_set( $opts );
  $self = $self->union( $_, { warn_overlap => $warn_overlap } )  for  @ranges;
#  $self->{opts} = $opts;
  return $self;
}

# local options can override defaults
sub option_mangler {
  my (@passed_opts) = grep defined, @_;
  # next line is redundant but an optimization
  return $Number::Range::Regex::Range::default_opts  unless  @passed_opts;
  unshift @passed_opts, $Number::Range::Regex::Range::default_opts;
  my $opts;
  foreach my $opts_ref ( @passed_opts ) {
    die "too many arguments from ".join(":", caller())." $opts_ref" unless ref $opts_ref eq 'HASH';
    # make a copy of options hashref, add overrides
    while (my ($key, $val) = each %$opts_ref) {
      $opts->{$key} = $val;
    }
  }
  return $opts;
}

sub _order_by_min {
  my ($a, $b) = @_;
  return $a->{min} < $b->{min} ? ($a, $b) : ($b, $a);
}

sub base_digits {
  my ($base) = @_;
  return join '', map { $Number::Range::Regex::Range::STANDARD_DIGIT_ORDER[$_] } (0..$base-1);
}

sub base_next {
  my ($c, $base_digits) = @_;
  my $ord = base_ord($c, $base_digits);
  return  if  $ord+1 == length $base_digits;
  return base_chr($ord+1, $base_digits);
}

sub base_prev {
  my ($c, $base_digits) = @_;
  my $ord = base_ord($c, $base_digits);
  return  if  $ord == 0;
  return base_chr($ord-1, $base_digits);
}

#TODO: memoize base_ord, base_chr for performance?
sub base_ord {
  my ($c, $base_digits) = @_;
  return -1                    if  $c eq -1;
  return 1+length $base_digits  if  length $c > 1;
  my $ord = index $base_digits, $c;
  die "$c not found in $base_digits"  if  $ord == -1;
  return $ord;
}

sub base_chr {
  my ($n, $base_digits) = @_;
  my $chr = substr($base_digits, $n, 1);
  die "offset out of range: $n > ".length($base_digits)  if  !length $chr;
  return $chr;
}

#TODO: should _calculate_digit_range() be in Util?
# calculate the tersest possible representation of a digit range
# '1'            -> 1
# '12'           -> [12]
# '123'          -> [1-3] #preferred stylistically to [123]
# '1234'         -> [1-4]
# '0123456789'   -> \d
# '123456789abc' -> [1-9a-c]
sub _calculate_digit_range {
  my ($digit_min, $digit_max, $base_digits) = @_;
  return  unless  defined $digit_min && defined $digit_max;
  my $ord_min = base_ord( $digit_min, $base_digits );
  my $ord_max = base_ord( $digit_max, $base_digits );
  return             if  $ord_min > $ord_max;
  return $digit_min  if  $ord_min == $ord_max;
  my @range_chars;
  for(my $n=$ord_min; $n <= $ord_max; ++$n) {
    push @range_chars, base_chr( $n, $base_digits );
  }
  my $last = $range_chars[0];
  my $n = 1;
  while($n < @range_chars) {
    my $this = $range_chars[$n];
    if(1 == ord($this)-ord($last)) {
      $range_chars[$n-1] .= $this;
      splice @range_chars, $n, 1;
    } else {
      $n++;
    }
    $last = $this;
  }
  foreach my $n (0..$#range_chars) {
    my $str = $range_chars[$n];
    my $len = length $str;
    die "internal error"  if  $len == 0;
    next                  if  $len == 1; # 'a' is as terse as possible
    next                  if  $len == 2; # 'bc' is also as terse as possible
    # collapse e.g. 234567 into 2-7
    my $first = substr($str, 0, 1);
    my $last  = substr($str, -1, 1);
    $range_chars[$n] = ($first eq '0' && $last eq '9') ? '\d' : "$first-$last";
  }
  if(1==@range_chars) {
    my $ret = $range_chars[0];
    # we don't need brackets if all we have is \d or a single digit
    return $ret  if  $ret eq '\d' || length($ret)==1;
  }
  return join '', '[', @range_chars, ']';
}

1;