The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# pX/Common/iterator_engine.pl - fglock

#



use strict;

use warnings;

#use Smart::Comments; for debugging, look also at Filtered-Comments.pm



=pod



A "rule" function gets as argument a list:



0 - a string to match

1 - an optional "continuation"

2 - an optional "flags" hashref

    'capture'=>1 means 'return whatever matches'



it returns (or "yields"):



    undef - match failed



or a hash containing:



    state - a "continuation" or undef

    bool - an "assertion" (true/false)

    match - the "match" tree or undef

    tail - the string tail or undef

    capture - the tree of captured things

    abort - the match was stopped by a { return } or a fail(),

           and it should not backtrack or whatever



Continuations are used for backtracking.



A "ruleop" function gets some arguments and returns a "rule".



=cut



# XXX - optimization - pass the string index around, 

# XXX   instead of copying the whole string to $tail every time



# XXX - weaken self-referential things



sub ruleop::alternation {

    # alternation is first match (not longest).  though we need a 

    # separate longest match for tokens (putter on #perl6)

    # update: <%var> does longest match based on the keys length() (TimToady on #perl6)



    # note: the list in @$nodes can be modified at runtime



    my $nodes = shift;

    # die "alternation list is empty" unless ref($nodes) eq 'ARRAY' && @$nodes;

    return sub {

        ### testing alternations on : @_, $nodes

        return unless @$nodes;



        my $tail =  $_[0];

        my $state = $_[1] ? [ @{$_[1]} ] : [ 0, 0 ];

        my $flags = $_[2];

        my $match;

        while ( defined $state ) {

            ### alternation string to match: "$tail - (node,state)=@$state"

            $match = 

                $nodes->[ $state->[0] ]->( $tail, $state->[1], $flags );

            ### match: $match

            if ( $match->{state} ) {

                $state->[1] = $match->{state};

            }

            else

            {

                $state->[0]++;

                $state->[1] = 0;

                ### next alternation state - (node,state):@$state

                $state = undef if $state->[0] > $#$nodes;

            }

            $match->{state} = $state;

            return $match if $match->{bool} || $match->{abort};

        }

        return;

    }

}



sub ruleop::concat {

    

    # note: the list in @nodes can NOT be modified at runtime

    # update: this is ok, because we can use <$var><$var> instead

    

    return ruleop::concat( +shift, ruleop::concat( @_ ) )

        if @_ > 2;

    my @nodes = @_;

    return sub {

        my $tail  = $_[0];

        my @state = $_[1] ? ( @{$_[1]} ) : ( 0, 0 );

        my $flags = $_[2];

        my @matches;

        while (1) {

            

            $matches[0] = $nodes[0]->( $tail, $state[0], $flags );

            ### 1st match: $matches[0]

            return $matches[0] 

                if $matches[0]{abort};

            if ( ! $matches[0]{bool} ) {

                return unless defined $matches[0]{state};

                @state = ( $matches[0]{state}, 0 );

                next;

            }

            

            $matches[1] = $nodes[1]->( $matches[0]{tail}, $state[1], $flags );

            ### 2nd match: $matches[1]

            return $matches[1] 

                if $matches[1]{abort};

            if ( ! $matches[1]{bool} ) {

                if ( ! defined( $matches[1]{state} ) ) {

                    return unless defined $matches[0]{state};

                    @state = ( $matches[0]{state}, 0 );

                }

                ### backtracking - state: @state

                ### backtracking - match: @matches

                next;

            }

            

            my $succ;

            if ( ! defined( $matches[1]{state} ) ) {

                $succ = [ $matches[0]{state}, 0 ] if defined $matches[0]{state};

            }

            else {

                $succ = [ $state[0], $matches[1]{state} ];

            }



            my $capture = [];

            ### capture: $matches[0]{capture},$matches[1]{capture}

            $capture = $matches[0]{capture} 

                if $matches[0]{capture};

            push @$capture, @{$matches[1]{capture}} 

                if $matches[1]{capture};

            undef $capture unless @$capture;



            return { 

                bool =>  1,

                match => [ @matches ], 

                tail =>  $matches[1]{tail},

                state => $succ,

                capture => $capture,

            };

        }

    }

}



sub ruleop::constant { 

    my $const = shift;

    return sub {

        ### matching constant:$_[0],$const

        return if ! $_[0] || $_[0] !~ m/^(\Q$const\E)(.*)/s;

        return { bool => 1,

                 match => { constant => $1 }, 

                 capture => [ $1 ], 

                 tail => $2,

               }

           if $_[2]{capture};  # flags->{capture}

        return { bool => 1,

                 match => { constant => $1 }, 

                 tail => $2,

               }

    }

}



sub ruleop::null {

    return sub {

        return { bool => 1,

                 match => 'null',

                 ( $_[2]->{capture} ? ( capture => [ '' ] ) : () ),

                 tail => $_[0],

               }

    }

};



sub ruleop::capture {

    # sets the 'capture' flag and return a labeled capture

    # XXX - generalize to: set_flag('capture',1)

    my $label = shift;

    my $node = shift;

    sub {

        my @param = @_;

        $param[2] = {} unless defined $param[2];

        $param[2] = { %{$param[2]}, capture=>1 };

        my $match = $node->( @param );

        return unless $match->{bool};

        return if $match->{abort};

        my $new_match = { %$match };

        $new_match->{capture} = [ { $label => $match->{capture} } ];

        return $new_match;

    }

}



=for capture

At runtime, this must return _only_ the capture set inside capture_closure:

  xx(xx(xx(

    capture_closure(..)

  )))

One way to do it is to post-process the match:

  try(

    xx(xx(xx(

      abort(

        capture_closure(..)

      )

    )))

  )

abort() sets a 'rule_finished' flag in the returned match, 

that makes it return until the start of the rule, which unsets the flag before returning.

- this can also be used to do fail() and assert(), and 'no-backtracking checkpoints'

=cut



# experimental!

sub ruleop::try { 

    my $op = shift;

    return sub {

        my $match = $op->( @_ );

        ### abortable match...

        $match->{abort} = 0;

        return $match;

    };

};



# experimental!

sub ruleop::abort { 

    my $op = shift;

    return sub {

        my $match = $op->( @_ );

        ### aborting match: $match

        $match->{abort} = 1;

        return $match;

    };

};



# experimental!

sub ruleop::negate { 

    my $op = shift;

    return sub {

        my $tail = $_[0];

        my $match = $op->( @_ );

        return if $match->{bool};

        return { bool => 1,

                 match => 'null',

                 tail => $tail,

               }

    };

};



# experimental!

=for example

    # adds an 'before' or 'after' sub call, which may print a debug message 

    ruleop::wrap( { 

            before => sub { print "matching variable: $_[0]\n" },

            after  => sub { $_[0]->{bool} ? print "matched\n" : print "no match\n" },

        },

        \&variable

    )

=cut

sub ruleop::wrap {

    my $debug = shift;

    my $node = shift;

    sub {

        $debug->{before}( @_ ) if $debug->{before};

        my $match = $node->( @_ );

        $debug->{after}( $match, @_ ) if $debug->{after};

        return $match;

    }

}



# ------- higher-order ruleops



sub ruleop::optional {

    return ruleop::alternation( [ $_[0], ruleop::null() ] );

}



sub ruleop::null_or_optional {

    return ruleop::alternation( [ ruleop::null(), $_[0] ] );

}



sub ruleop::greedy_plus { 

    my $node = shift;

    my $alt;

    $alt = ruleop::concat( 

        $node, 

        ruleop::optional( sub{ goto $alt } ),  

    );

    return $alt;

}



sub ruleop::greedy_star { 

    my $node = shift;

    return ruleop::optional( ruleop::greedy_plus( $node ) );

}



sub ruleop::non_greedy_star { 

    my $node = shift;

    ruleop::alternation( [ 

        ruleop::null(),

        ruleop::non_greedy_plus( $node ) 

    ] );

}



sub ruleop::non_greedy_plus { 

    my $node = shift;



    # XXX - needs optimization for faster backtracking, less stack usage



    return sub {

        my $tail =  $_[0];

        my $state = $_[1] || { state => undef, op => $node };

        my $flags = $_[2];



        # XXX - didn't work

        # my $match = $state->{op}->( $tail, $state->{state}, $flags ); 



        my $match = $state->{op}->( $tail, undef, $flags );

        return unless $match->{bool};

        $match->{state} = {

            state => $match->{state},

            op    => ruleop::concat( $node, $state->{op} ),

        };

        return $match;

    }

}



1;