cpansmokebox/inc/bundle/Regexp/Assemble.pm

# Regexp::Assemple.pm
#
# Copyright (c) 2004-2008 David Landgren
# All rights reserved

package Regexp::Assemble;

use vars qw/$VERSION $have_Storable $Current_Lexer $Default_Lexer $Single_Char $Always_Fail/;
$VERSION = '0.34';

use strict;

use constant DEBUG_ADD  => 1;
use constant DEBUG_TAIL => 2;
use constant DEBUG_LEX  => 4;
use constant DEBUG_TIME => 8;

# The following patterns were generated with eg/naive
$Default_Lexer = qr/(?![[(\\]).(?:[*+?]\??|\{\d+(?:,\d*)?\}\??)?|\\(?:[bABCEGLQUXZ]|[lu].|(?:[^\w]|[aefnrtdDwWsS]|c.|0\d{2}|x(?:[\da-fA-F]{2}|{[\da-fA-F]{4}})|N\{\w+\}|[Pp](?:\{\w+\}|.))(?:[*+?]\??|\{\d+(?:,\d*)?\}\??)?)|\[.*?(?<!\\)\](?:[*+?]\??|\{\d+(?:,\d*)?\}\??)?|\(.*?(?<!\\)\)(?:[*+?]\??|\{\d+(?:,\d*)?\}\??)?/; # ]) restore equilibrium

$Single_Char   = qr/^(?:\\(?:[aefnrtdDwWsS]|c.|[^\w\/{|}-]|0\d{2}|x(?:[\da-fA-F]{2}|{[\da-fA-F]{4}}))|[^\$^])$/;

# the pattern to return when nothing has been added (and thus not match anything)
$Always_Fail = "^\\b\0";

sub new {
    my $class = shift;
    my %args  = @_;

    my $anc;
    for $anc (qw(word line string)) {
        if (exists $args{"anchor_$anc"}) {
            my $val = delete $args{"anchor_$anc"};
            for my $anchor ("anchor_${anc}_begin", "anchor_${anc}_end") {
                $args{$anchor} = $val unless exists $args{$anchor};
            }
        }
    }

    # anchor_string_absolute sets anchor_string_begin and anchor_string_end_absolute
    if (exists $args{anchor_string_absolute}) {
        my $val = delete $args{anchor_string_absolute};
        for my $anchor (qw(anchor_string_begin anchor_string_end_absolute)) {
            $args{$anchor} = $val unless exists $args{$anchor};
        }
    }

    exists $args{$_} or $args{$_} = 0 for qw(
        anchor_word_begin
        anchor_word_end
        anchor_line_begin
        anchor_line_end
        anchor_string_begin
        anchor_string_end
        anchor_string_end_absolute
        debug
        dup_warn
        indent
        lookahead
        mutable
        track
        unroll_plus
    );

    exists $args{$_} or $args{$_} = 1 for qw(
        fold_meta_pairs
        reduce
        chomp
    );

    @args{qw(re str path)} = (undef, undef, []);

    $args{flags} ||= delete $args{modifiers} || '';
    $args{lex}     = $Current_Lexer if defined $Current_Lexer;

    my $self = bless \%args, $class;

    if ($self->_debug(DEBUG_TIME)) {
        $self->_init_time_func();
        $self->{_begin_time} = $self->{_time_func}->();
    }
    $self->{input_record_separator} = delete $self->{rs}
        if exists $self->{rs};
    exists $self->{file} and $self->add_file($self->{file});

    return $self;
}

sub _init_time_func {
    my $self = shift;
    return if exists $self->{_time_func};

    # attempt to improve accuracy
    if (!defined($self->{_use_time_hires})) {
        eval {require Time::HiRes};
        $self->{_use_time_hires} = $@;
    }
    $self->{_time_func} = length($self->{_use_time_hires}) > 0
        ? sub { time }
        : \&Time::HiRes::time
    ;
}

sub clone {
    my $self = shift;
    my $clone;
    my @attr = grep {$_ ne 'path'} keys %$self;
    @{$clone}{@attr} = @{$self}{@attr};
    $clone->{path}   = _path_clone($self->_path);
    bless $clone, ref($self);
}

sub _fastlex {
    my $self   = shift;
    my $record = shift;
    my $len    = 0;
    my @path   = ();
    my $case   = '';
    my $qm     = '';

    my $debug       = $self->{debug} & DEBUG_LEX;
    my $unroll_plus = $self->{unroll_plus};

    my $token;
    my $qualifier;
    $debug and print "# _lex <$record>\n";
    my $modifier        = q{(?:[*+?]\\??|\\{(?:\\d+(?:,\d*)?|,\d+)\\}\\??)?};
    my $class_matcher   = qr/\[(?:\[:[a-z]+:\]|\\?.)*?\]/;
    my $paren_matcher   = qr/\(.*?(?<!\\)\)$modifier/;
    my $misc_matcher    = qr/(?:(c)(.)|(0)(\d{2}))($modifier)/;
    my $regular_matcher = qr/([^\\[(])($modifier)/;
    my $qm_matcher      = qr/(\\?.)/;

    my $matcher = $regular_matcher;
    {
        if ($record =~ /\G$matcher/gc) {
            # neither a \\ nor [ nor ( followed by a modifer
            if ($1 eq '\\E') {
                $debug and print "#   E\n";
                $case = $qm = '';
                $matcher = $regular_matcher;
                redo;
            }
            elsif ($qm and ($1 eq '\\L' or $1 eq '\\U')) {
                $debug and print "#  ignore \\L, \\U\n";
                redo;
            }
            $token = $1;
            $qualifier = defined $2 ? $2 : '';
            $debug and print "#  token <$token> <$qualifier>\n";
            if ($qm) {
                $token = quotemeta($token);
                $token =~ s/^\\([^\w$()*+.?@\[\\\]^|{}\/])$/$1/;
            }
            else {
                $token =~ s{\A([][{}*+?@\\/])\Z}{\\$1};
            }
            if ($unroll_plus and $qualifier =~ s/\A\+(\?)?\Z/*/) {
                $1 and $qualifier .= $1;
                $debug and print " unroll <$token><$token><$qualifier>\n";
                $case and $token = $case eq 'L' ? lc($token) : uc($token);
                push @path, $token, "$token$qualifier";
            }
            else {
                $debug and print " clean <$token>\n";
                push @path,
                      $case eq 'L' ? lc($token).$qualifier
                    : $case eq 'U' ? uc($token).$qualifier
                    :                   $token.$qualifier
                    ;
            }
            redo;
        }

        elsif ($record =~ /\G\\/gc) {
            $debug and print "#  backslash\n";
            # backslash
            if ($record =~ /\G([sdwSDW])($modifier)/gc) {
                ($token, $qualifier) = ($1, $2);
                $debug and print "#   meta <$token> <$qualifier>\n";
                push @path, ($unroll_plus and $qualifier =~ s/\A\+(\?)?\Z/*/)
                    ? ("\\$token", "\\$token$qualifier" . (defined $1 ? $1 : ''))
                    : "\\$token$qualifier";
            }
            elsif ($record =~ /\Gx([\da-fA-F]{2})($modifier)/gc) {
                $debug and print "#   x $1\n";
                $token = quotemeta(chr(hex($1)));
                $qualifier = $2;
                $debug and print "#  cooked <$token>\n";
                $token =~ s/^\\([^\w$()*+.?\[\\\]^|{\/])$/$1/; # } balance
                $debug and print "#   giving <$token>\n";
                push @path, ($unroll_plus and $qualifier =~ s/\A\+(\?)?\Z/*/)
                    ? ($token, "$token$qualifier" . (defined $1 ? $1 : ''))
                    : "$token$qualifier";
            }
            elsif ($record =~ /\GQ/gc) {
                $debug and print "#   Q\n";
                $qm = 1;
                $matcher = $qm_matcher;
            }
            elsif ($record =~ /\G([LU])/gc) {
                $debug and print "#   case $1\n";
                $case = $1;
            }
            elsif ($record =~ /\GE/gc) {
                $debug and print "#   E\n";
                $case = $qm = '';
                $matcher = $regular_matcher;
            }
            elsif ($record =~ /\G([lu])(.)/gc) {
                $debug and print "#   case $1 to <$2>\n";
                push @path, $1 eq 'l' ? lc($2) : uc($2);
            }
            elsif (my @arg = grep {defined} $record =~ /\G$misc_matcher/gc) {
                if ($] < 5.007) {
                    my $len = 0;
                    $len += length($_) for @arg;
                    $debug and print "#  pos ", pos($record), " fixup add $len\n";
                    pos($record) = pos($record) + $len;
                }
                my $directive = shift @arg;
                if ($directive eq 'c') {
                    $debug and print "#  ctrl <@arg>\n";
                    push @path, "\\c" . uc(shift @arg);
                }
                else { # elsif ($directive eq '0') {
                    $debug and print "#  octal <@arg>\n";
                    my $ascii = oct(shift @arg);
                    push @path, ($ascii < 32)
                        ? "\\c" . chr($ascii+64)
                        : chr($ascii)
                    ;
                }
                $path[-1] .= join( '', @arg ); # if @arg;
                redo;
            }
            elsif ($record =~ /\G(.)/gc) {
                $token = $1;
                $token =~ s{[AZabefnrtz\[\]{}()\\\$*+.?@|/^]}{\\$token};
                $debug and print "#   meta <$token>\n";
                push @path, $token;
            }
            else {
                $debug and print "#   ignore char at ", pos($record), " of <$record>\n";
            }
            redo;
        }

        elsif ($record =~ /\G($class_matcher)($modifier)/gc) {
            # [class] followed by a modifer
            my $class     = $1;
            my $qualifier = defined $2 ? $2 : '';
            $debug and print "#  class begin <$class> <$qualifier>\n";
            if ($class =~ /\A\[\\?(.)]\Z/) {
                $class = quotemeta $1;
                $class =~ s{\A\\([!@%])\Z}{$1};
                $debug and print "#  class unwrap $class\n";
            }
            $debug and print "#  class end <$class> <$qualifier>\n";
            push @path, ($unroll_plus and $qualifier =~ s/\A\+(\?)?\Z/*/)
                ? ($class, "$class$qualifier" . (defined $1 ? $1 : ''))
                : "$class$qualifier";
            redo;
        }

        elsif ($record =~ /\G($paren_matcher)/gc) {
            $debug and print "#  paren <$1>\n";
            # (paren) followed by a modifer
            push @path, $1;
            redo;
        }

    }
    return \@path;
}

sub _lex {
    my $self   = shift;
    my $record = shift;
    my $len    = 0;
    my @path   = ();
    my $case   = '';
    my $qm     = '';
    my $re     = defined $self->{lex} ? $self->{lex}
        : defined $Current_Lexer ? $Current_Lexer
        : $Default_Lexer;
    my $debug  = $self->{debug} & DEBUG_LEX;
    $debug and print "# _lex <$record>\n";
    my ($token, $next_token, $diff, $token_len);
    while( $record =~ /($re)/g ) {
        $token = $1;
        $token_len = length($token);
        $debug and print "# lexed <$token> len=$token_len\n";
        if( pos($record) - $len > $token_len ) {
            $next_token = $token;
            $token = substr( $record, $len, $diff = pos($record) - $len - $token_len );
            $debug and print "#  recover <", substr( $record, $len, $diff ), "> as <$token>, save <$next_token>\n";
            $len += $diff;
        }
        $len += $token_len;
        TOKEN: {
            if( substr( $token, 0, 1 ) eq '\\' ) {
                if( $token =~ /^\\([ELQU])$/ ) {
                    if( $1 eq 'E' ) {
                        $qm and $re = defined $self->{lex} ? $self->{lex}
                            : defined $Current_Lexer ? $Current_Lexer
                            : $Default_Lexer;
                        $case = $qm = '';
                    }
                    elsif( $1 eq 'Q' ) {
                        $qm = $1;
                        # switch to a more precise lexer to quotemeta individual characters
                        $re = qr/\\?./;
                    }
                    else {
                        $case = $1;
                    }
                    $debug and print "#  state change qm=<$qm> case=<$case>\n";
                    goto NEXT_TOKEN;
                }
                elsif( $token =~ /^\\([lu])(.)$/ ) {
                    $debug and print "#  apply case=<$1> to <$2>\n";
                    push @path, $1 eq 'l' ? lc($2) : uc($2);
                    goto NEXT_TOKEN;
                }
                elsif( $token =~ /^\\x([\da-fA-F]{2})$/ ) {
                    $token = quotemeta(chr(hex($1)));
                    $debug and print "#  cooked <$token>\n";
                    $token =~ s/^\\([^\w$()*+.?@\[\\\]^|{\/])$/$1/; # } balance
                    $debug and print "#   giving <$token>\n";
                }
                else {
                    $token =~ s/^\\([^\w$()*+.?@\[\\\]^|{\/])$/$1/; # } balance
                    $debug and print "#  backslashed <$token>\n";
                }
            }
            else {
                $case and $token = $case eq 'U' ? uc($token) : lc($token);
                $qm   and $token = quotemeta($token);
                $token = '\\/' if $token eq '/';
            }
            # undo quotemeta's brute-force escapades
            $qm and $token =~ s/^\\([^\w$()*+.?@\[\\\]^|{}\/])$/$1/;
            $debug and print "#   <$token> case=<$case> qm=<$qm>\n";
            push @path, $token;

            NEXT_TOKEN:
            if( defined $next_token ) {
                $debug and print "#   redo <$next_token>\n";
                $token = $next_token;
                $next_token = undef;
                redo TOKEN;
            }
        }
    }
    if( $len < length($record) ) {
        # NB: the remainder only arises in the case of degenerate lexer,
        # and if \Q is operative, the lexer will have been switched to
        # /\\?./, which means there can never be a remainder, so we
        # don't have to bother about quotemeta. In other words:
        # $qm will never be true in this block.
        my $remain = substr($record,$len); 
        $case and $remain = $case eq 'U' ? uc($remain) : lc($remain);
        $debug and print "#   add remaining <$remain> case=<$case> qm=<$qm>\n";
        push @path, $remain;
    }
    $debug and print "# _lex out <@path>\n";
    return \@path;
}

sub add {
    my $self = shift;
    my $record;
    my $debug  = $self->{debug} & DEBUG_LEX;
    while( defined( $record = shift @_ )) {
        CORE::chomp($record) if $self->{chomp};
        next if $self->{pre_filter} and not $self->{pre_filter}->($record);
        $debug and print "# add <$record>\n";
        $self->{stats_raw} += length $record;
        my $list = $record =~ /[+*?(\\\[{]/ # }]) restore equilibrium
            ? $self->{lex} ? $self->_lex($record) : $self->_fastlex($record)
            : [split //, $record]
        ;
        next if $self->{filter} and not $self->{filter}->(@$list);
        $self->_insertr( $list );
    }
    return $self;
}

sub add_file {
    my $self = shift;
    my $rs;
    my @file;
    if (ref($_[0]) eq 'HASH') {
        my $arg = shift;
        $rs = $arg->{rs}
            || $arg->{input_record_separator}
            || $self->{input_record_separator}
            || $/;
        @file = ref($arg->{file}) eq 'ARRAY'
            ? @{$arg->{file}}
            : $arg->{file};
    }
    else {
        $rs   = $self->{input_record_separator} || $/;
        @file = @_;
    }
    local $/ = $rs;
    my $file;
    for $file (@file) {
        open my $fh, '<', $file or do {
            require Carp;
            Carp::croak("cannot open $file for input: $!");
        };
        while (defined (my $rec = <$fh>)) {
            $self->add($rec);
        }
        close $fh;
    }
    return $self;
}

sub insert {
    my $self = shift;
    return if $self->{filter} and not $self->{filter}->(@_);
    $self->_insertr( [@_] );
    return $self;
}

sub _insertr {
    my $self   = shift;
    my $dup    = $self->{stats_dup} || 0;
    $self->{path} = $self->_insert_path( $self->_path, $self->_debug(DEBUG_ADD), $_[0] );
    if( not defined $self->{stats_dup} or $dup == $self->{stats_dup} ) {
        ++$self->{stats_add};
        $self->{stats_cooked} += defined($_) ? length($_) : 0 for @{$_[0]};
    }
    elsif( $self->{dup_warn} ) {
        if( ref $self->{dup_warn} eq 'CODE' ) {
            $self->{dup_warn}->($self, $_[0]); 
        }
        else {
            my $pattern = join( '', @{$_[0]} );
            require Carp;
            Carp::carp("duplicate pattern added: /$pattern/");
        }
    }
    $self->{str} = $self->{re} = undef;
}

sub lexstr {
    return shift->_lex(shift);
}

sub pre_filter {
    my $self   = shift;
    my $pre_filter = shift;
    if( defined $pre_filter and ref($pre_filter) ne 'CODE' ) {
        require Carp;
        Carp::croak("pre_filter method not passed a coderef");
    }
    $self->{pre_filter} = $pre_filter;
    return $self;
}


sub filter {
    my $self   = shift;
    my $filter = shift;
    if( defined $filter and ref($filter) ne 'CODE' ) {
        require Carp;
        Carp::croak("filter method not passed a coderef");
    }
    $self->{filter} = $filter;
    return $self;
}

sub as_string {
    my $self = shift;
    if( not defined $self->{str} ) {
        if( $self->{track} ) {
            $self->{m}      = undef;
            $self->{mcount} = 0;
            $self->{mlist}  = [];
            $self->{str}    = _re_path_track($self, $self->_path, '', '');
        }
        else {
            $self->_reduce unless ($self->{mutable} or not $self->{reduce});
            my $arg  = {@_};
            $arg->{indent} = $self->{indent}
                if not exists $arg->{indent} and $self->{indent} > 0;
            if( exists $arg->{indent} and $arg->{indent} > 0 ) {
                $arg->{depth} = 0;
                $self->{str}  = _re_path_pretty($self, $self->_path, $arg);
            }
            elsif( $self->{lookahead} ) {
                $self->{str}  = _re_path_lookahead($self, $self->_path);
            }
            else {
                $self->{str}  = _re_path($self, $self->_path);
            }
        }
        if (not length $self->{str}) {
            # explicitly fail to match anything if no pattern was generated
            $self->{str} = $Always_Fail;
        }
        else {
            my $begin = 
                  $self->{anchor_word_begin}   ? '\\b'
                : $self->{anchor_line_begin}   ? '^'
                : $self->{anchor_string_begin} ? '\A'
                : ''
            ;
            my $end = 
                  $self->{anchor_word_end}            ? '\\b'
                : $self->{anchor_line_end}            ? '$'
                : $self->{anchor_string_end}          ? '\Z'
                : $self->{anchor_string_end_absolute} ? '\z'
                : ''
            ;
            $self->{str} = "$begin$self->{str}$end";
        }
        $self->{path} = [] unless $self->{mutable};
    }
    return $self->{str};
}

sub re {
    my $self = shift;
    $self->_build_re($self->as_string(@_)) unless defined $self->{re};
    return $self->{re};
}

use overload '""' => sub {
    my $self = shift;
    return $self->{re} if $self->{re};
    $self->_build_re($self->as_string());
    return $self->{re};
};

sub _build_re {
    my $self  = shift;
    my $str   = shift;
    if( $self->{track} ) {
        use re 'eval';
        $self->{re} = length $self->{flags}
            ? qr/(?$self->{flags}:$str)/
            : qr/$str/
        ;
    }
    else {
        # how could I not repeat myself?
        $self->{re} = length $self->{flags}
            ? qr/(?$self->{flags}:$str)/
            : qr/$str/
        ;
    }
}

sub match {
    my $self = shift;
    my $target = shift;
    $self->_build_re($self->as_string(@_)) unless defined $self->{re};
    $self->{m}    = undef;
    $self->{mvar} = [];
    if( not $target =~ /$self->{re}/ ) {
        $self->{mbegin} = [];
        $self->{mend}   = [];
        return undef;
    }
    $self->{m}      = $^R if $] >= 5.009005;
    $self->{mbegin} = _path_copy([@-]);
    $self->{mend}   = _path_copy([@+]);
    my $n = 0;
    for( my $n = 0; $n < @-; ++$n ) {
        push @{$self->{mvar}}, substr($target, $-[$n], $+[$n] - $-[$n])
            if defined $-[$n] and defined $+[$n];
    }
    if( $self->{track} ) {
        return defined $self->{m} ? $self->{mlist}[$self->{m}] : 1;
    }
    else {
        return 1;
    }
}

sub source {
    my $self = shift;
    return unless $self->{track};
    defined($_[0]) and return $self->{mlist}[$_[0]];
    return unless defined $self->{m};
    return $self->{mlist}[$self->{m}];
}

sub mbegin {
    my $self = shift;
    return exists $self->{mbegin} ? $self->{mbegin} : [];
}

sub mend {
    my $self = shift;
    return exists $self->{mend} ? $self->{mend} : [];
}

sub mvar {
    my $self = shift;
    return undef unless exists $self->{mvar};
    return defined($_[0]) ? $self->{mvar}[$_[0]] : $self->{mvar};
}

sub capture {
    my $self = shift;
    if( $self->{mvar} ) {
        my @capture = @{$self->{mvar}};
        shift @capture;
        return @capture;
    }
    return ();
}

sub matched {
    my $self = shift;
    return defined $self->{m} ? $self->{mlist}[$self->{m}] : undef;
}

sub stats_add {
    my $self = shift;
    return $self->{stats_add} || 0;
}

sub stats_dup {
    my $self = shift;
    return $self->{stats_dup} || 0;
}

sub stats_raw {
    my $self = shift;
    return $self->{stats_raw} || 0;
}

sub stats_cooked {
    my $self = shift;
    return $self->{stats_cooked} || 0;
}

sub stats_length {
    my $self = shift;
    return (defined $self->{str} and $self->{str} ne $Always_Fail) ? length $self->{str} : 0;
}

sub dup_warn {
    my $self = shift;
    $self->{dup_warn} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub anchor_word_begin {
    my $self = shift;
    $self->{anchor_word_begin} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub anchor_word_end {
    my $self = shift;
    $self->{anchor_word_end} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub anchor_word {
    my $self  = shift;
    my $state = shift;
    $self->anchor_word_begin($state)->anchor_word_end($state);
    return $self;
}

sub anchor_line_begin {
    my $self = shift;
    $self->{anchor_line_begin} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub anchor_line_end {
    my $self = shift;
    $self->{anchor_line_end} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub anchor_line {
    my $self  = shift;
    my $state = shift;
    $self->anchor_line_begin($state)->anchor_line_end($state);
    return $self;
}

sub anchor_string_begin {
    my $self = shift;
    $self->{anchor_string_begin} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub anchor_string_end {
    my $self = shift;
    $self->{anchor_string_end} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub anchor_string_end_absolute {
    my $self = shift;
    $self->{anchor_string_end_absolute} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub anchor_string {
    my $self  = shift;
    my $state = defined($_[0]) ? $_[0] : 1;
    $self->anchor_string_begin($state)->anchor_string_end($state);
    return $self;
}

sub anchor_string_absolute {
    my $self  = shift;
    my $state = defined($_[0]) ? $_[0] : 1;
    $self->anchor_string_begin($state)->anchor_string_end_absolute($state);
    return $self;
}

sub debug {
    my $self = shift;
    $self->{debug} = defined($_[0]) ? $_[0] : 0;
    if ($self->_debug(DEBUG_TIME)) {
        # hmm, debugging time was switched on after instantiation
        $self->_init_time_func;
        $self->{_begin_time} = $self->{_time_func}->();
    }
    return $self;
}

sub dump {
    return _dump($_[0]->_path);
}

sub chomp {
    my $self = shift;
    $self->{chomp} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub fold_meta_pairs {
    my $self = shift;
    $self->{fold_meta_pairs} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub indent {
    my $self = shift;
    $self->{indent} = defined($_[0]) ? $_[0] : 0;
    return $self;
}

sub lookahead {
    my $self = shift;
    $self->{lookahead} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub flags {
    my $self = shift;
    $self->{flags} = defined($_[0]) ? $_[0] : '';
    return $self;
}

sub modifiers {
    my $self = shift;
    return $self->flags(@_);
}

sub track {
    my $self = shift;
    $self->{track} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub unroll_plus {
    my $self = shift;
    $self->{unroll_plus} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub lex {
    my $self = shift;
    $self->{lex} = qr($_[0]);
    return $self;
}

sub reduce {
    my $self = shift;
    $self->{reduce} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub mutable {
    my $self = shift;
    $self->{mutable} = defined($_[0]) ? $_[0] : 1;
    return $self;
}

sub reset {
    # reinitialise the internal state of the object
    my $self = shift;
    $self->{path} = [];
    $self->{re}   = undef;
    $self->{str}  = undef;
    return $self;
}

sub Default_Lexer {
    if( $_[0] ) {
        if( my $refname = ref($_[0]) ) {
            require Carp;
            Carp::croak("Cannot pass a $refname to Default_Lexer");
        }
        $Current_Lexer = $_[0];
    }
    return defined $Current_Lexer ? $Current_Lexer : $Default_Lexer;
}

# --- no user serviceable parts below ---

# -- debug helpers

sub _debug {
    my $self = shift;
    return $self->{debug} & shift() ? 1 : 0;
}

# -- helpers

sub _path {
    # access the path
    return $_[0]->{path};
}

# -- the heart of the matter

$have_Storable = do {
    eval {
        require Storable;
        import Storable 'dclone';
    };
    $@ ? 0 : 1;
};

sub _path_clone {
    $have_Storable ? dclone($_[0]) : _path_copy($_[0]);
}

sub _path_copy {
    my $path = shift;
    my $new  = [];
    for( my $p = 0; $p < @$path; ++$p ) {
        if( ref($path->[$p]) eq 'HASH' ) {
            push @$new, _node_copy($path->[$p]);
        }
        elsif( ref($path->[$p]) eq 'ARRAY' ) {
            push @$new, _path_copy($path->[$p]);
        }
        else {
            push @$new, $path->[$p];
        }
    }
    return $new;
}

sub _node_copy {
    my $node = shift;
    my $new  = {};
    while( my( $k, $v ) = each %$node ) {
        $new->{$k} = defined($v)
            ? _path_copy($v)
            : undef
        ;
    }
    return $new;
}

sub _insert_path {
    my $self  = shift;
    my $list  = shift;
    my $debug = shift;
    my @in    = @{shift()}; # create a new copy
    if( @$list == 0 ) { # special case the first time
        if( @in == 0 or (@in == 1 and (not defined $in[0] or $in[0] eq ''))) {
            return [{'' => undef}];
        }
        else {
            return \@in;
        }
    }
    $debug and print "# _insert_path @{[_dump(\@in)]} into @{[_dump($list)]}\n";
    my $path   = $list;
    my $offset = 0;
    my $token;
    if( not @in ) {
        if( ref($list->[0]) ne 'HASH' ) {
            return [ { '' => undef, $list->[0] => $list } ];
        }
        else {
            $list->[0]{''} = undef;
            return $list;
        }
    }
    while( defined( $token = shift @in )) {
        if( ref($token) eq 'HASH' ) {
            $debug and print "#  p0=", _dump($path), "\n";
            $path = $self->_insert_node( $path, $offset, $token, $debug, @in );
            $debug and print "#  p1=", _dump($path), "\n";
            last;
        }
        if( ref($path->[$offset]) eq 'HASH' ) {
            $debug and print "#   at (off=$offset len=@{[scalar @$path]}) ", _dump($path->[$offset]), "\n";
            my $node = $path->[$offset];
            if( exists( $node->{$token} )) {
                if ($offset < $#$path) {
                    my $new = {
                        $token => [$token, @in],
                        _re_path($self, [$node]) => [@{$path}[$offset..$#$path]],
                    };
                    splice @$path, $offset, @$path-$offset, $new;
                    last;
                }
                else {
                    $debug and print "#   descend key=$token @{[_dump($node->{$token})]}\n";
                    $path   = $node->{$token};
                    $offset = 0;
                    redo;
                }
            }
            else {
                $debug and print "#   add path ($token:@{[_dump(\@in)]}) into @{[_dump($path)]} at off=$offset to end=@{[scalar $#$path]}\n";
                if( $offset == $#$path ) {
                    $node->{$token} = [ $token, @in ];
                }
                else {
                    my $new = {
                        _node_key($token) => [ $token, @in ],
                        _node_key($node)  => [@{$path}[$offset..$#{$path}]],
                    };
                    splice( @$path, $offset, @$path - $offset, $new );
                    $debug and print "#   fused node=@{[_dump($new)]} path=@{[_dump($path)]}\n";
                }
                last;
            }
        }

        if( $debug ) {
            my $msg = '';
            my $n;
            for( $n = 0; $n < @$path; ++$n ) {
                $msg .= ' ' if $n;
                my $atom = ref($path->[$n]) eq 'HASH'
                    ? '{'.join( ' ', keys(%{$path->[$n]})).'}'
                    : $path->[$n]
                ;
                $msg .= $n == $offset ? "<$atom>" : $atom;
            }
            print "# at path ($msg)\n";
        }

        if( $offset >= @$path ) {
            push @$path, { $token => [ $token, @in ], '' => undef };
            $debug and print "#   added remaining @{[_dump($path)]}\n";
            last;
        }
        elsif( $token ne $path->[$offset] ) {
            $debug and print "#   token $token not present\n";
            splice @$path, $offset, @$path-$offset, {
                length $token
                    ? ( _node_key($token) => [$token, @in])
                    : ( '' => undef )
                ,
                $path->[$offset] => [@{$path}[$offset..$#{$path}]],
            };
            $debug and print "#   path=@{[_dump($path)]}\n";
            last;
        }
        elsif( not @in ) {
            $debug and print "#   last token to add\n";
            if( defined( $path->[$offset+1] )) {
                ++$offset;
                if( ref($path->[$offset]) eq 'HASH' ) {
                    $debug and print "#   add sentinel to node\n";
                    $path->[$offset]{''} = undef;
                }
                else {
                    $debug and print "#   convert <$path->[$offset]> to node for sentinel\n";
                    splice @$path, $offset, @$path-$offset, {
                        ''               => undef,
                        $path->[$offset] => [ @{$path}[$offset..$#{$path}] ],
                    };
                }
            }
            else {
                # already seen this pattern
                ++$self->{stats_dup};
            }
            last;
        }
        # if we get here then @_ still contains a token
        ++$offset;
    }
    $list;
}

sub _insert_node {
    my $self   = shift;
    my $path   = shift;
    my $offset = shift;
    my $token  = shift;
    my $debug  = shift;
    my $path_end = [@{$path}[$offset..$#{$path}]];
    # NB: $path->[$offset] and $[path_end->[0] are equivalent
    my $token_key = _re_path($self, [$token]);
    $debug and print "#  insert node(@{[_dump($token)]}:@{[_dump(\@_)]}) (key=$token_key)",
        " at path=@{[_dump($path_end)]}\n";
    if( ref($path_end->[0]) eq 'HASH' ) {
        if( exists($path_end->[0]{$token_key}) ) {
            if( @$path_end > 1 ) {
                my $path_key = _re_path($self, [$path_end->[0]]);
                my $new = {
                    $path_key  => [ @$path_end ],
                    $token_key => [ $token, @_ ],
                };
                $debug and print "#   +bifurcate new=@{[_dump($new)]}\n";
                splice( @$path, $offset, @$path_end, $new );
            }
            else {
                my $old_path = $path_end->[0]{$token_key};
                my $new_path = [];
                while( @$old_path and _node_eq( $old_path->[0], $token )) {
                    $debug and print "#  identical nodes in sub_path ",
                        ref($token) ? _dump($token) : $token, "\n";
                    push @$new_path, shift(@$old_path);
                    $token = shift @_;
                }
                if( @$new_path ) {
                    my $new;
                    my $token_key = $token;
                    if( @_ ) {
                        $new = {
                            _re_path($self, $old_path) => $old_path,
                            $token_key => [$token, @_],
                        };
                        $debug and print "#  insert_node(bifurc) n=@{[_dump([$new])]}\n";
                    }
                    else {
                        $debug and print "#  insert $token into old path @{[_dump($old_path)]}\n";
                        if( @$old_path ) {
                            $new = ($self->_insert_path( $old_path, $debug, [$token] ))->[0];
                        }
                        else {
                            $new = { '' => undef, $token => [$token] };
                        }
                    }
                    push @$new_path, $new;
                }
                $path_end->[0]{$token_key} = $new_path;
                $debug and print "#   +_insert_node result=@{[_dump($path_end)]}\n";
                splice( @$path, $offset, @$path_end, @$path_end );
            }
        }
        elsif( not _node_eq( $path_end->[0], $token )) {
            if( @$path_end > 1 ) {
                my $path_key = _re_path($self, [$path_end->[0]]);
                my $new = {
                    $path_key  => [ @$path_end ],
                    $token_key => [ $token, @_ ],
                };
                $debug and print "#   path->node1 at $path_key/$token_key @{[_dump($new)]}\n";
                splice( @$path, $offset, @$path_end, $new );
            }
            else {
                $debug and print "#   next in path is node, trivial insert at $token_key\n";
                $path_end->[0]{$token_key} = [$token, @_];
                splice( @$path, $offset, @$path_end, @$path_end );
            }
        }
        else {
            while( @$path_end and _node_eq( $path_end->[0], $token )) {
                $debug and print "#  identical nodes @{[_dump([$token])]}\n";
                shift @$path_end;
                $token = shift @_;
                ++$offset;
            }
            if( @$path_end ) {
                $debug and print "#   insert at $offset $token:@{[_dump(\@_)]} into @{[_dump($path_end)]}\n";
                $path_end = $self->_insert_path( $path_end, $debug, [$token, @_] );
                $debug and print "#   got off=$offset s=@{[scalar @_]} path_add=@{[_dump($path_end)]}\n";
                splice( @$path, $offset, @$path - $offset, @$path_end );
                $debug and print "#   got final=@{[_dump($path)]}\n";
            }
            else {
                $token_key = _node_key($token);
                my $new = {
                    ''         => undef,
                    $token_key => [ $token, @_ ],
                };
                $debug and print "#   convert opt @{[_dump($new)]}\n";
                push @$path, $new;
            }
        }
    }
    else {
        if( @$path_end ) {
            my $new = {
                $path_end->[0] => [ @$path_end ],
                $token_key     => [ $token, @_ ],
            };
            $debug and print "#   atom->node @{[_dump($new)]}\n";
            splice( @$path, $offset, @$path_end, $new );
            $debug and print "#   out=@{[_dump($path)]}\n";
        }
        else {
            $debug and print "#   add opt @{[_dump([$token,@_])]} via $token_key\n";
            push @$path, {
                ''         => undef,
                $token_key => [ $token, @_ ],
            };
        }
    }
    $path;
}

sub _reduce {
    my $self    = shift;
    my $context = { debug => $self->_debug(DEBUG_TAIL), depth => 0 };

    if ($self->_debug(DEBUG_TIME)) {
        $self->_init_time_func;
        my $now = $self->{_time_func}->();
        if (exists $self->{_begin_time}) {
            printf "# load=%0.6f\n", $now - $self->{_begin_time};
        }
        else {
            printf "# load-epoch=%0.6f\n", $now;
        }
        $self->{_begin_time} = $self->{_time_func}->();
    }

    my ($head, $tail) = _reduce_path( $self->_path, $context );
    $context->{debug} and print "# final head=", _dump($head), ' tail=', _dump($tail), "\n";
    if( !@$head ) {
        $self->{path} = $tail;
    }
    else {
        $self->{path} = [
            @{_unrev_path( $tail, $context )},
            @{_unrev_path( $head, $context )},
        ];
    }

    if ($self->_debug(DEBUG_TIME)) {
        my $now = $self->{_time_func}->();
        if (exists $self->{_begin_time}) {
            printf "# reduce=%0.6f\n", $now - $self->{_begin_time};
        }
        else {
            printf "# reduce-epoch=%0.6f\n", $now;
        }
        $self->{_begin_time} = $self->{_time_func}->();
    }

    $context->{debug} and print "# final path=", _dump($self->{path}), "\n";
    return $self;
}

sub _remove_optional {
    if( exists $_[0]->{''} ) {
        delete $_[0]->{''};
        return 1;
    }
    return 0;
}

sub _reduce_path {
    my ($path, $ctx) = @_;
    my $indent = ' ' x $ctx->{depth};
    my $debug  =       $ctx->{debug};
    $debug and print "#$indent _reduce_path $ctx->{depth} ", _dump($path), "\n";
    my $new;
    my $head = [];
    my $tail = [];
    while( defined( my $p = pop @$path )) {
        if( ref($p) eq 'HASH' ) {
            my ($node_head, $node_tail) = _reduce_node($p, _descend($ctx) );
            $debug and print "#$indent| head=", _dump($node_head), " tail=", _dump($node_tail), "\n";
            push @$head, @$node_head if scalar @$node_head;
            push @$tail, ref($node_tail) eq 'HASH' ? $node_tail : @$node_tail;
        }
        else {
            if( @$head ) {
                $debug and print "#$indent| push $p leaves @{[_dump($path)]}\n";
                push @$tail, $p;
            }
            else {
                $debug and print "#$indent| unshift $p\n";
                unshift @$tail, $p;
            }
        }
    }
    $debug and print "#$indent| tail nr=@{[scalar @$tail]} t0=", ref($tail->[0]),
        (ref($tail->[0]) eq 'HASH' ? " n=" . scalar(keys %{$tail->[0]}) : '' ),
        "\n";
    if( @$tail > 1
        and ref($tail->[0]) eq 'HASH'
        and keys %{$tail->[0]} == 2
    ) {
        my $opt;
        my $fixed;
        while( my ($key, $path) = each %{$tail->[0]} ) {
            $debug and print "#$indent| scan k=$key p=@{[_dump($path)]}\n";
            next unless $path;
            if (@$path == 1 and ref($path->[0]) eq 'HASH') {
                $opt = $path->[0];
            }
            else {
                $fixed = $path;
            }
        }
        if( exists $tail->[0]{''} ) {
            my $path = [@{$tail}[1..$#{$tail}]];
            $tail = $tail->[0];
            ($head, $tail, $path) = _slide_tail( $head, $tail, $path, _descend($ctx) );
            $tail = [$tail, @$path];
        }
    }
    $debug and print "#$indent _reduce_path $ctx->{depth} out head=", _dump($head), ' tail=', _dump($tail), "\n";
    return ($head, $tail);
}

sub _reduce_node {
    my ($node, $ctx) = @_;
    my $indent = ' ' x $ctx->{depth};
    my $debug  =       $ctx->{debug};
    my $optional = _remove_optional($node);
    $debug and print "#$indent _reduce_node $ctx->{depth} in @{[_dump($node)]} opt=$optional\n";
    if( $optional and scalar keys %$node == 1 ) {
        my $path = (values %$node)[0];
        if( not grep { ref($_) eq 'HASH' } @$path ) {
            # if we have removed an optional, and there is only one path
            # left then there is nothing left to compare. Because of the
            # optional it cannot participate in any further reductions.
            # (unless we test for equality among sub-trees).
            my $result = {
                ''         => undef,
                $path->[0] => $path
            };
            $debug and print "#$indent| fast fail @{[_dump($result)]}\n";
            return [], $result;
        }
    }

    my( $fail, $reduce ) = _scan_node( $node, _descend($ctx) );

    $debug and print "#$indent|_scan_node done opt=$optional reduce=@{[_dump($reduce)]} fail=@{[_dump($fail)]}\n";

    # We now perform tail reduction on each of the nodes in the reduce
    # hash. If we have only one key, we know we will have a successful
    # reduction (since everything that was inserted into the node based
    # on the value of the last token of each path all mapped to the same
    # value).

    if( @$fail == 0 and keys %$reduce == 1 and not $optional) {
        # every path shares a common path
        my $path = (values %$reduce)[0];
        my ($common, $tail) = _do_reduce( $path, _descend($ctx) );
        $debug and print "#$indent|_reduce_node  $ctx->{depth} common=@{[_dump($common)]} tail=", _dump($tail), "\n";
        return( $common, $tail );
    }

    # this node resulted in a list of paths, game over
    $ctx->{indent} = $indent;
    return _reduce_fail( $reduce, $fail, $optional, _descend($ctx) );
}

sub _reduce_fail {
    my( $reduce, $fail, $optional, $ctx ) = @_;
    my( $debug, $depth, $indent ) = @{$ctx}{qw(debug depth indent)};
    my %result;
    $result{''} = undef if $optional;
    my $p;
    for $p (keys %$reduce) {
        my $path = $reduce->{$p};
        if( scalar @$path == 1 ) {
            $path = $path->[0];
            $debug and print "#$indent| -simple opt=$optional unrev @{[_dump($path)]}\n";
            $path = _unrev_path($path, _descend($ctx) );
            $result{_node_key($path->[0])} = $path;
        }
        else {
            $debug and print "#$indent| _do_reduce(@{[_dump($path)]})\n";
            my ($common, $tail) = _do_reduce( $path, _descend($ctx) );
            $path = [
                (
                    ref($tail) eq 'HASH'
                        ? _unrev_node($tail, _descend($ctx) )
                        : _unrev_path($tail, _descend($ctx) )
                ),
                @{_unrev_path($common, _descend($ctx) )}
            ];
            $debug and print "#$indent| +reduced @{[_dump($path)]}\n";
            $result{_node_key($path->[0])} = $path;
        }
    }
    my $f;
    for $f( @$fail ) {
        $debug and print "#$indent| +fail @{[_dump($f)]}\n";
        $result{$f->[0]} = $f;
    }
    $debug and print "#$indent _reduce_fail $depth fail=@{[_dump(\%result)]}\n";
    return ( [], \%result );
}

sub _scan_node {
    my( $node, $ctx ) = @_;
    my $indent = ' ' x $ctx->{depth};
    my $debug  =       $ctx->{debug};

    # For all the paths in the node, reverse them. If the first token
    # of the path is a scalar, push it onto an array in a hash keyed by
    # the value of the scalar.
    #
    # If it is a node, call _reduce_node on this node beforehand. If we
    # get back a common head, all of the paths in the subnode shared a
    # common tail. We then store the common part and the remaining node
    # of paths (which is where the paths diverged from the end and install
    # this into the same hash. At this point both the common and the tail
    # are in reverse order, just as simple scalar paths are.
    #
    # On the other hand, if there were no common path returned then all
    # the paths of the sub-node diverge at the end character. In this
    # case the tail cannot participate in any further reductions and will
    # appear in forward order.
    #
    # certainly the hurgliest function in the whole file :(

    # $debug = 1 if $depth >= 8;
    my @fail;
    my %reduce;

    my $n;
    for $n(
        map { substr($_, index($_, '#')+1) }
        sort
        map {
            join( '|' =>
                scalar(grep {ref($_) eq 'HASH'} @{$node->{$_}}),
                _node_offset($node->{$_}),
                scalar @{$node->{$_}},
            )
            . "#$_"
        }
    keys %$node ) {
        my( $end, @path ) = reverse @{$node->{$n}};
        if( ref($end) ne 'HASH' ) {
            $debug and print "# $indent|_scan_node push reduce ($end:@{[_dump(\@path)]})\n";
            push @{$reduce{$end}}, [ $end, @path ];
        }
        else {
            $debug and print "# $indent|_scan_node head=", _dump(\@path), ' tail=', _dump($end), "\n";
            my $new_path;
            # deal with sing, singing => s(?:ing)?ing
            if( keys %$end == 2 and exists $end->{''} ) {
                my ($key, $opt_path) = each %$end;
                ($key, $opt_path) = each %$end if $key eq '';
                $opt_path = [reverse @{$opt_path}];
                $debug and print "# $indent| check=", _dump($opt_path), "\n";
                my $end = { '' => undef, $opt_path->[0] => [@$opt_path] };
                my $head = [];
                my $path = [@path];
                ($head, my $slide, $path) = _slide_tail( $head, $end, $path, $ctx );
                if( @$head ) {
                    $new_path = [ @$head, $slide, @$path ];
                }
            }
            if( $new_path ) {
                $debug and print "# $indent|_scan_node slid=", _dump($new_path), "\n";
                push @{$reduce{$new_path->[0]}}, $new_path;
            }
            else {
                my( $common, $tail ) = _reduce_node( $end, _descend($ctx) );
                    if( not @$common ) {
                    $debug and print "# $indent| +failed $n\n";
                    push @fail, [reverse(@path), $tail];
                }
                else {
                    my $path = [@path];
                    $debug and print "# $indent|_scan_node ++recovered common=@{[_dump($common)]} tail=",
                        _dump($tail), " path=@{[_dump($path)]}\n";
                    if( ref($tail) eq 'HASH'
                        and keys %$tail == 2
                    ) {
                        if( exists $tail->{''} ) {
                            ($common, $tail, $path) = _slide_tail( $common, $tail, $path, $ctx );
                        }
                    }
                    push @{$reduce{$common->[0]}}, [
                        @$common, 
                        (ref($tail) eq 'HASH' ? $tail : @$tail ),
                        @$path
                    ];
                }
            }
        }
    }
    $debug and print
        "# $indent|_scan_node counts: reduce=@{[scalar keys %reduce]} fail=@{[scalar @fail]}\n";
    return( \@fail, \%reduce );
}

sub _do_reduce {
    my ($path, $ctx) = @_;
    my $indent = ' ' x $ctx->{depth};
    my $debug  =       $ctx->{debug};
    my $ra = Regexp::Assemble->new(chomp=>0);
    $ra->debug($debug);
    $debug and print "# $indent| do @{[_dump($path)]}\n";
    $ra->_insertr( $_ ) for
        # When nodes come into the picture, we have to be careful
        # about how we insert the paths into the assembly.
        # Paths with nodes first, then closest node to front
        # then shortest path. Merely because if we can control
        # order in which paths containing nodes get inserted,
        # then we can make a couple of assumptions that simplify
        # the code in _insert_node.
        sort {
            scalar(grep {ref($_) eq 'HASH'} @$a)
            <=> scalar(grep {ref($_) eq 'HASH'} @$b)
                ||
            _node_offset($b) <=> _node_offset($a)
                ||
            scalar @$a <=> scalar @$b
        }
        @$path
    ;
    $path = $ra->_path;
    my $common = [];
    push @$common, shift @$path while( ref($path->[0]) ne 'HASH' );
    my $tail = scalar( @$path ) > 1 ? [@$path] : $path->[0];
    $debug and print "# $indent| _do_reduce common=@{[_dump($common)]} tail=@{[_dump($tail)]}\n";
    return ($common, $tail);
}

sub _node_offset {
    # return the offset that the first node is found, or -ve
    # optimised for speed
    my $nr = @{$_[0]};
    my $atom = -1;
    ref($_[0]->[$atom]) eq 'HASH' and return $atom while ++$atom < $nr;
    return -1;
}

sub _slide_tail {
    my $head   = shift;
    my $tail   = shift;
    my $path   = shift;
    my $ctx    = shift;
    my $indent = ' ' x $ctx->{depth};
    my $debug  =       $ctx->{debug};
    $debug and print "# $indent| slide in h=", _dump($head),
        ' t=', _dump($tail), ' p=', _dump($path), "\n";
    my $slide_path = (each %$tail)[-1];
    $slide_path = (each %$tail)[-1] unless defined $slide_path;
    $debug and print "# $indent| slide potential ", _dump($slide_path), " over ", _dump($path), "\n";
    while( defined $path->[0] and $path->[0] eq $slide_path->[0] ) {
        $debug and print "# $indent| slide=tail=$slide_path->[0]\n";
        my $slide = shift @$path;
        shift @$slide_path;
        push @$slide_path, $slide;
        push @$head, $slide;
    }
    $debug and print "# $indent| slide path ", _dump($slide_path), "\n";
    my $slide_node = {
        '' => undef,
        _node_key($slide_path->[0]) => $slide_path,
    };
    $debug and print "# $indent| slide out h=", _dump($head),
        ' s=', _dump($slide_node), ' p=', _dump($path), "\n";
    return ($head, $slide_node, $path);
}

sub _unrev_path {
    my ($path, $ctx) = @_;
    my $indent = ' ' x $ctx->{depth};
    my $debug  =       $ctx->{debug};
    my $new;
    if( not grep { ref($_) } @$path ) {
        $debug and print "# ${indent}_unrev path fast ", _dump($path);
        $new = [reverse @$path];
        $debug and print "#  -> ", _dump($new), "\n";
        return $new;
    }
    $debug and print "# ${indent}unrev path in ", _dump($path), "\n";
    while( defined( my $p = pop @$path )) {
        push @$new,
              ref($p) eq 'HASH'  ? _unrev_node($p, _descend($ctx) )
            : ref($p) eq 'ARRAY' ? _unrev_path($p, _descend($ctx) )
            : $p
        ;
    }
    $debug and print "# ${indent}unrev path out ", _dump($new), "\n";
    return $new;
}

sub _unrev_node {
    my ($node, $ctx ) = @_;
    my $indent = ' ' x $ctx->{depth};
    my $debug  =       $ctx->{debug};
    my $optional = _remove_optional($node);
    $debug and print "# ${indent}unrev node in ", _dump($node), " opt=$optional\n";
    my $new;
    $new->{''} = undef if $optional;
    my $n;
    for $n( keys %$node ) {
        my $path = _unrev_path($node->{$n}, _descend($ctx) );
        $new->{_node_key($path->[0])} = $path;
    }
    $debug and print "# ${indent}unrev node out ", _dump($new), "\n";
    return $new;
}

sub _node_key {
    my $node = shift;
    return _node_key($node->[0]) if ref($node) eq 'ARRAY';
    return $node unless ref($node) eq 'HASH';
    my $key = '';
    my $k;
    for $k( keys %$node ) {
        next if $k eq '';
        $key = $k if $key eq '' or $key gt $k;
    }
    return $key;
}

sub _descend {
    # Take a context object, and increase the depth by one.
    # By creating a fresh hash each time, we don't have to
    # bother adding make-work code to decrease the depth
    # when we return from what we called.
    my $ctx = shift;
    return {%$ctx, depth => $ctx->{depth}+1};
}

#####################################################################

sub _make_class {
    my $self = shift;
    my %set = map { ($_,1) } @_;
    delete $set{'\\d'} if exists $set{'\\w'};
    delete $set{'\\D'} if exists $set{'\\W'};
    return '.' if exists $set{'.'}
        or ($self->{fold_meta_pairs} and (
               (exists $set{'\\d'} and exists $set{'\\D'})
            or (exists $set{'\\s'} and exists $set{'\\S'})
            or (exists $set{'\\w'} and exists $set{'\\W'})
        ))
    ;
    for my $meta( q/\\d/, q/\\D/, q/\\s/, q/\\S/, q/\\w/, q/\\W/ ) {
        if( exists $set{$meta} ) {
            my $re = qr/$meta/;
            my @delete;
            $_ =~ /^$re$/ and push @delete, $_ for keys %set;
            delete @set{@delete} if @delete;
        }
    }
    return (keys %set)[0] if keys %set == 1;
    for my $meta( '.', '+', '*', '?', '(', ')', '^', '@', '$', '[', '/', ) {
        exists $set{"\\$meta"} and $set{$meta} = delete $set{"\\$meta"};
    }
    my $dash  = exists $set{'-'} ? do { delete($set{'-'}), '-' } : '';
    my $caret = exists $set{'^'} ? do { delete($set{'^'}), '^' } : '';
    my $class = join( '' => sort keys %set );
    $class =~ s/0123456789/\\d/ and $class eq '\\d' and return $class;
    return "[$dash$class$caret]";
}

sub _re_sort {
    return length $b <=> length $a || $a cmp $b
}

sub _combine {
    my $self = shift;
    my $type = shift;
    # print "c in = @{[_dump(\@_)]}\n";
    # my $combine = 
    return '('
    . $type
    . do {
        my( @short, @long );
        push @{ /^$Single_Char$/ ? \@short : \@long}, $_ for @_;
        if( @short == 1 ) {
            @long = sort _re_sort @long, @short;
        }
        elsif( @short > 1 ) {
            # yucky but true
            my @combine = (_make_class($self, @short), sort _re_sort @long);
            @long = @combine;
        }
        else {
            @long = sort _re_sort @long;
        }
        join( '|', @long );
    }
    . ')';
    # print "combine <$combine>\n";
    # $combine;
}

sub _combine_new {
    my $self = shift;
    my( @short, @long );
    push @{ /^$Single_Char$/ ? \@short : \@long}, $_ for @_;
    if( @short == 1 and @long == 0 ) {
        return $short[0];
    }
    elsif( @short > 1 and @short == @_ ) {
        return _make_class($self, @short);
    }
    else {
        return '(?:'
            . join( '|' =>
                @short > 1
                    ? ( _make_class($self, @short), sort _re_sort @long)
                    : ( (sort _re_sort( @long )), @short )
            )
        . ')';
    }
}

sub _re_path {
    my $self = shift;
    # in shorter assemblies, _re_path() is the second hottest
    # routine. after insert(), so make it fast.

    if ($self->{unroll_plus}) {
        # but we can't easily make this blockless
        my @arr = @{$_[0]};
        my $str = '';
        my $skip = 0;
        for my $i (0..$#arr) {
            if (ref($arr[$i]) eq 'ARRAY') {
                $str .= _re_path($self, $arr[$i]);
            }
            elsif (ref($arr[$i]) eq 'HASH') {
                $str .= exists $arr[$i]->{''}
                    ? _combine_new( $self,
                        map { _re_path( $self, $arr[$i]->{$_} ) } grep { $_ ne '' } keys %{$arr[$i]}
                    ) . '?'
                    : _combine_new($self, map { _re_path( $self, $arr[$i]->{$_} ) } keys %{$arr[$i]})
                ;
            }
            elsif ($i < $#arr and $arr[$i+1] =~ /\A$arr[$i]\*(\??)\Z/) {
                $str .= "$arr[$i]+" . (defined $1 ? $1 : '');
                ++$skip;
            }
            elsif ($skip) {
                $skip = 0;
            }
            else {
                $str .= $arr[$i];
            }
        }
        return $str;
    }

    return join( '', @_ ) unless grep { length ref $_ } @_;
    my $p;
    return join '', map {
        ref($_) eq '' ? $_
        : ref($_) eq 'HASH' ? do {
            # In the case of a node, see whether there's a '' which
            # indicates that the whole thing is optional and thus
            # requires a trailing ?
            # Unroll the two different paths to avoid the needless
            # grep when it isn't necessary.
            $p = $_;
            exists $_->{''}
            ?  _combine_new( $self,
                map { _re_path( $self, $p->{$_} ) } grep { $_ ne '' } keys %$_
            ) . '?'
            : _combine_new($self, map { _re_path( $self, $p->{$_} ) } keys %$_ )
        }
        : _re_path($self, $_) # ref($_) eq 'ARRAY'
    } @{$_[0]}
}

sub _lookahead {
    my $in = shift;
    my %head;
    my $path;
    for $path( keys %$in ) {
        next unless defined $in->{$path};
        # print "look $path: ", ref($in->{$path}[0]), ".\n";
        if( ref($in->{$path}[0]) eq 'HASH' ) {
            my $next = 0;
            while( ref($in->{$path}[$next]) eq 'HASH' and @{$in->{$path}} > $next + 1 ) {
                if( exists $in->{$path}[$next]{''} ) {
                    ++$head{$in->{$path}[$next+1]};
                }
                ++$next;
            }
            my $inner = _lookahead( $in->{$path}[0] );
            @head{ keys %$inner } = (values %$inner);
        }
        elsif( ref($in->{$path}[0]) eq 'ARRAY' ) {
            my $subpath = $in->{$path}[0]; 
            for( my $sp = 0; $sp < @$subpath; ++$sp ) {
                if( ref($subpath->[$sp]) eq 'HASH' ) {
                    my $follow = _lookahead( $subpath->[$sp] );
                    @head{ keys %$follow } = (values %$follow);
                    last unless exists $subpath->[$sp]{''};
                }
                else {
                    ++$head{$subpath->[$sp]};
                    last;
                }
            }
        }
        else {
            ++$head{ $in->{$path}[0] };
        }
    }
    # print "_lookahead ", _dump($in), '==>', _dump([keys %head]), "\n";
    return \%head;
}

sub _re_path_lookahead {
    my $self = shift;
    my $in  = shift;
    # print "_re_path_la in ", _dump($in), "\n";
    my $out = '';
    for( my $p = 0; $p < @$in; ++$p ) {
        if( ref($in->[$p]) eq '' ) {
            $out .= $in->[$p];
            next;
        }
        elsif( ref($in->[$p]) eq 'ARRAY' ) {
            $out .= _re_path_lookahead($self, $in->[$p]);
            next;
        }
        # print "$p ", _dump($in->[$p]), "\n";
        my $path = [
            map { _re_path_lookahead($self, $in->[$p]{$_} ) }
            grep { $_ ne '' }
            keys %{$in->[$p]}
        ];
        my $ahead = _lookahead($in->[$p]);
        my $more = 0;
        if( exists $in->[$p]{''} and $p + 1 < @$in ) {
            my $next = 1;
            while( $p + $next < @$in ) {
                if( ref( $in->[$p+$next] ) eq 'HASH' ) {
                    my $follow = _lookahead( $in->[$p+$next] );
                    @{$ahead}{ keys %$follow } = (values %$follow);
                }
                else {
                    ++$ahead->{$in->[$p+$next]};
                    last;
                }
                ++$next;
            }
            $more = 1;
        }
        my $nr_one = grep { /^$Single_Char$/ } @$path;
        my $nr     = @$path;
        if( $nr_one > 1 and $nr_one == $nr ) {
            $out .= _make_class($self, @$path);
            $out .= '?' if exists $in->[$p]{''};
        }
        else {
            my $zwla = keys(%$ahead) > 1
                ?  _combine($self, '?=', grep { s/\+$//; $_ } keys %$ahead )
                : '';
            my $patt = $nr > 1 ? _combine($self, '?:', @$path ) : $path->[0];
            # print "have nr=$nr n1=$nr_one n=", _dump($in->[$p]), ' a=', _dump([keys %$ahead]), " zwla=$zwla patt=$patt @{[_dump($path)]}\n";
            if( exists $in->[$p]{''} ) {
                $out .=  $more ? "$zwla(?:$patt)?" : "(?:$zwla$patt)?";
            }
            else {
                $out .= "$zwla$patt";
            }
        }
    }
    return $out;
}

sub _re_path_track {
    my $self      = shift;
    my $in        = shift;
    my $normal    = shift;
    my $augmented = shift;
    my $o;
    my $simple  = '';
    my $augment = '';
    for( my $n = 0; $n < @$in; ++$n ) {
        if( ref($in->[$n]) eq '' ) {
            $o = $in->[$n];
            $simple  .= $o;
            $augment .= $o;
            if( (
                    $n < @$in - 1
                    and ref($in->[$n+1]) eq 'HASH' and exists $in->[$n+1]{''}
                )
                or $n == @$in - 1
            ) {
                push @{$self->{mlist}}, $normal . $simple ;
                $augment .= $] < 5.009005
                    ? "(?{\$self->{m}=$self->{mcount}})"
                    : "(?{$self->{mcount}})"
                ;
                ++$self->{mcount};
            }
        }
        else {
            my $path = [
                map { $self->_re_path_track( $in->[$n]{$_}, $normal.$simple , $augmented.$augment ) }
                grep { $_ ne '' }
                keys %{$in->[$n]}
            ];
            $o = '(?:' . join( '|' => sort _re_sort @$path ) . ')';
            $o .= '?' if exists $in->[$n]{''};
            $simple  .= $o;
            $augment .= $o;
        }
    }
    return $augment;
}

sub _re_path_pretty {
    my $self = shift;
    my $in  = shift;
    my $arg = shift;
    my $pre    = ' ' x (($arg->{depth}+0) * $arg->{indent});
    my $indent = ' ' x (($arg->{depth}+1) * $arg->{indent});
    my $out = '';
    $arg->{depth}++;
    my $prev_was_paren = 0;
    for( my $p = 0; $p < @$in; ++$p ) {
        if( ref($in->[$p]) eq '' ) {
            $out .= "\n$pre" if $prev_was_paren;
            $out .= $in->[$p];
            $prev_was_paren = 0;
        }
        elsif( ref($in->[$p]) eq 'ARRAY' ) {
            $out .= _re_path($self, $in->[$p]);
        }
        else {
            my $path = [
                map { _re_path_pretty($self, $in->[$p]{$_}, $arg ) }
                grep { $_ ne '' }
                keys %{$in->[$p]}
            ];
            my $nr = @$path;
            my( @short, @long );
            push @{/^$Single_Char$/ ? \@short : \@long}, $_ for @$path;
            if( @short == $nr ) {
                $out .=  $nr == 1 ? $path->[0] : _make_class($self, @short);
                $out .= '?' if exists $in->[$p]{''};
            }
            else {
                $out .= "\n" if length $out;
                $out .= $pre if $p;
                $out .= "(?:\n$indent";
                if( @short < 2 ) {
                    my $r = 0;
                    $out .= join( "\n$indent|" => map {
                            $r++ and $_ =~ s/^\(\?:/\n$indent(?:/;
                            $_
                        }
                        sort _re_sort @$path
                    );
                }
                else {
                    $out .= join( "\n$indent|" => ( (sort _re_sort @long), _make_class($self, @short) ));
                }
                $out .= "\n$pre)";
                if( exists $in->[$p]{''} ) {
                    $out .= "\n$pre?";
                    $prev_was_paren = 0;
                }
                else {
                    $prev_was_paren = 1;
                }
            }
        }
    }
    $arg->{depth}--;
    return $out;
}

sub _node_eq {
    return 0 if not defined $_[0] or not defined $_[1];
    return 0 if ref $_[0] ne ref $_[1];
    # Now that we have determined that the reference of each
    # argument are the same, we only have to test the first
    # one, which gives us a nice micro-optimisation.
    if( ref($_[0]) eq 'HASH' ) {
        keys %{$_[0]} == keys %{$_[1]}
            and
        # does this short-circuit to avoid _re_path() cost more than it saves?
        join( '|' => sort keys %{$_[0]}) eq join( '|' => sort keys %{$_[1]})
            and
        _re_path(undef, [$_[0]] ) eq _re_path(undef, [$_[1]] );
    }
    elsif( ref($_[0]) eq 'ARRAY' ) {
        scalar @{$_[0]} == scalar @{$_[1]}
            and
        _re_path(undef, $_[0]) eq _re_path(undef, $_[1]);
    }
    else {
        $_[0] eq $_[1];
    }
}

sub _pretty_dump {
    return sprintf "\\x%02x", ord(shift);
}

sub _dump {
    my $path = shift;
    return _dump_node($path) if ref($path) eq 'HASH';
    my $dump = '[';
    my $d;
    my $nr = 0;
    for $d( @$path ) {
        $dump .= ' ' if $nr++;
        if( ref($d) eq 'HASH' ) {
            $dump .= _dump_node($d);
        }
        elsif( ref($d) eq 'ARRAY' ) {
            $dump .= _dump($d);
        }
        elsif( defined $d ) {
            # D::C indicates the second test is redundant
            # $dump .= ( $d =~ /\s/ or not length $d )
            $dump .= (
                $d =~ /\s/            ? qq{'$d'}         :
                $d =~ /^[\x00-\x1f]$/ ? _pretty_dump($d) :
                $d
            );
        }
        else {
            $dump .= '*';
        }
    }
    return $dump . ']';
}

sub _dump_node {
    my $node = shift;
    my $dump = '{';
    my $nr   = 0;
    my $n;
    for $n (sort keys %$node) {
        $dump .= ' ' if $nr++;
        # Devel::Cover shows this to test to be redundant
        # $dump .= ( $n eq '' and not defined $node->{$n} )
        $dump .= $n eq ''
            ? '*'
            : ($n =~ /^[\x00-\x1f]$/ ? _pretty_dump($n) : $n)
                . "=>" . _dump($node->{$n})
        ;
    }
    return $dump . '}';
}

'The Lusty Decadent Delights of Imperial Pompeii';
__END__
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)