The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Pugs::Emitter::Rule::Perl5::Ratchet;

# p6-rule perl5 emitter for ":ratchet" (non-backtracking)
# see: RuleInline.pl, RuleInline-more.pl for a program prototype

# XXX - cleanup unused nodes

use strict;
use warnings;
use Data::Dumper;
$Data::Dumper::Indent = 1;

our $direction = "+";  # XXX make lexical
our $sigspace = 0;

# XXX - reuse this sub in metasyntax()
sub call_subrule {
    my ( $subrule, $tab, @param ) = @_;
    $subrule = "\$grammar->" . $subrule unless $subrule =~ / :: | \. | -> /x;
    $subrule =~ s/\./->/;   # XXX - source filter
    return 
        "$tab     $subrule( \$s, { p => \$pos, args => {" . join(", ",@param) . "} }, \$_[3] )";
}

sub call_constant {
    my $const = $_[0];
    my $len = length( eval "'$const'" );
    $const = $_[0] eq '\\' ? '\\\\' : $_[0];  # XXX - generalize
    return
    "$_[1] ( ( substr( \$s, \$pos, $len ) eq '$const' ) 
$_[1]     ? do { \$pos $direction= $len; 1 }
$_[1]     : 0
$_[1] )";
}

sub call_perl5 {
    my $const = $_[0];
    return
    "$_[1] ( ( substr( \$s, \$pos ) =~ m/^$const/s )  
$_[1]     ? do { \$pos $direction= length \$&; 1 }
$_[1]     : 0
$_[1] )";
}

sub emit {
    my ($grammar, $ast, $param) = @_;
    # runtime parameters: $grammar, $string, $state, $arg_list
    # rule parameters: see Runtime::Rule.pm
    local $sigspace = $param->{sigspace};   # XXX - $sigspace should be lexical
    return 
        "sub {\n" . 
        "  my \$grammar = \$_[0];\n" .
        "  my \$s = \$_[1];\n" .
        "  my \$pos = \$_[3]{p};\n" .
        #"  print \"match arg_list = \$_[1]\n\";\n" .
        #"  print \"match arg_list = \@{[\%{\$_[1]} ]}\n\" if defined \$_[1];\n" .
        "  \$pos = 0 unless defined \$pos;   # TODO - .*? \$match \n" .
        #"  print \"match pos = \$pos\n\";\n" .
        "  my \%index;\n" . 
        "  my \@match;\n" .
        "  my \%named;\n" .
        #"  my \$from = \$pos;\n" .
        "  my \$bool = 1;\n" .
        "  my \$capture;\n" .
        "  my \$quantified;\n" .
        "  my \$m = bless \\{ \n" .
        "    str => \\\$s, from => \\(0+\$pos), to => \\(\$pos), \n" .
        "    bool => \\\$bool, match => \\\@match, named => \\\%named, capture => \\\$capture, \n" .
        "  }, 'Pugs::Runtime::Match::Ratchet';\n" .
        "  \$bool = 0 unless\n" .
        emit_rule( $ast, ' ' ) . ";\n" .
        "  return \$m;\n" .
        "}\n";
}

sub emit_rule {
    my $n = $_[0];
    my $tab = $_[1] . '  ';
    die "unknown node: ", Dumper( $n )
        unless ref( $n ) eq 'HASH';
    #print "NODE ", Dumper($n);
    my ( $k, $v ) = each %$n;
    # XXX - use real references
    no strict 'refs';
    my $code = &$k( $v, $tab );
    return $code;
}

#rule nodes

sub non_capturing_group {
    return emit_rule( $_[0], $_[1] );
}        
sub quant {
    my $term = $_[0]->{'term'};
    my $quantifier = $_[0]->{quant};
    #print "QUANT: ",Dumper($_[0]);
    $quantifier = '' unless defined $quantifier;
    # TODO: fix grammar to not emit empty quantifier
    my $tab = ( $quantifier eq '' ) ? $_[1] : $_[1] . "  ";
    my $ws = metasyntax( '?ws', $tab );
    my $ws3 = ( $sigspace && $_[0]->{ws3} ne '' ) ? " &&\n$ws" : '';
    my $rul = emit_rule( $term, $tab );
    $rul = "$ws &&\n$rul" if $sigspace && $_[0]->{ws1} ne '';
    $rul = "$rul &&\n$ws" if $sigspace && $_[0]->{ws2} ne '';
    #print $rul;
    return $rul 
        if $quantifier eq '';
    # *  +  ?
    # TODO: *? +? ??
    # TODO: *+ ++ ?+
    # TODO: quantifier + capture creates Array
    return 
        "$_[1] do { my \$quantified = 1; (\n$rul\n" .
        "$_[1] ||\n" .
        "$_[1]   1\n" .
        "$_[1] ) }$ws3"
        if $quantifier eq '?';
    return 
        "$_[1] do { my \$quantified = 1; while (\n$rul) {}; 1 }$ws3"
        if $quantifier eq '*';
    return
        "$_[1] do { my \$quantified = 1;\n" . 
        "$_[1] (\n$rul\n" .
        "$_[1] &&\n" .
        "$_[1]   do { while (\n$rul) {}; 1 }\n" .
        "$_[1] ) }$ws3"
        if $quantifier eq '+';
    die "quantifier not implemented: $quantifier";
}        
sub alt {
    my @s;
    for ( @{$_[0]} ) { 
        my $tmp = emit_rule( $_, $_[1].'  ' );
        push @s, $tmp if $tmp;   
    }
    return 
        "$_[1] do {
$_[1]   my \$pos1 = \$pos;
$_[1]   do {
" . join( "\n$_[1]   } || do { \$pos = \$pos1;\n", @s ) . "
$_[1]   }
$_[1] }";
}        
sub concat {
    my @s;
    for ( @{$_[0]} ) { 
        my $tmp = emit_rule( $_, $_[1] );
        push @s, $tmp if $tmp;   
    }
    @s = reverse @s if $direction eq '-';
    return "$_[1] (\n" . join( "\n$_[1] &&\n", @s ) . "\n$_[1] )";
}        
sub code {
    return "$_[1] $_[0]\n";  
}        
sub dot {
    if ( $direction eq '+' ) {
        "$_[1] do { \$pos < length( \$s ) ? ++\$pos : 0 }"
    }
    else {
        "$_[1] do { \$pos >= 0 ? do{ --\$pos; 1 } : 0 }"
    }
}
sub variable {
    my $name = "$_[0]";
    my $value = undef;
    # XXX - eval $name doesn't look up in user lexical pad
    # XXX - what &xxx interpolate to?
    
    if ( $name =~ /^\$/ ) {
        # $^a, $^b
        if ( $name =~ /^ \$ \^ ([^\s]*) /x ) {
            my $index = ord($1)-ord('a');
            #print "Variable #$index\n";
            #return "$_[1] constant( \$_[7][$index] )\n";
            
            my $code = 
            "    ... sub { 
                #print \"Runtime Variable args[\", join(\",\",\@_) ,\"] \$_[7][$index]\\n\";
                return constant( \$_[7][$index] )->(\@_);
            }";
            $code =~ s/^/$_[1]/mg;
            return "$code\n";
        }
        else {
            $value = eval $name;
        }
    }
    
    $value = join('', eval $name) if $name =~ /^\@/;
    if ( $name =~ /^%/ ) {
        # XXX - runtime or compile-time interpolation?
        return "$_[1] ... hash( \\$name )\n" if $name =~ /::/;
        return "$_[1] ... hash( get_variable( '$name' ) )\n";
    }
    die "interpolation of $name not implemented"
        unless defined $value;

    return call_constant( $value, $_[1] );
}
sub special_char {
    my $char = substr($_[0],1);
    for ( qw( r n t e f w d s ) ) {
        return call_perl5(   "\\$_",  $_[1] ) if $char eq $_;
        return call_perl5( "[^\\$_]", $_[1] ) if $char eq uc($_);
    }
    $char = '\\\\' if $char eq '\\';
    return call_constant( $char, $_[1] );
}
sub match_variable {
    my $name = $_[0];
    my $num = substr($name,1);
    #print "var name: ", $num, "\n";
    my $code = 
    "    ... sub { 
        my \$m = Pugs::Runtime::Match->new( \$_[2] );
        return constant( \"\$m->[$num]\" )->(\@_);
    }";
    $code =~ s/^/$_[1]/mg;
    return "$code\n";
}
sub closure {
    my $code = $_[0]; 
    
    # XXX XXX XXX - source-filter - temporary hacks to translate p6 to p5
    # $()<name>
    $code =~ s/ ([^']) \$ \( \) < (.*?) > /$1 \$_[0]->[$2] /sgx;
    # $<name>
    $code =~ s/ ([^']) \$ < (.*?) > /$1 \$_[0]->{$2} /sgx;
    # $()
    $code =~ s/ ([^']) \$ \( \) /$1 \$_[0]->() /sgx;
    # $/
    $code =~ s/ ([^']) \$ \/ /$1 \$_[0] /sgx;
    #print "Code: $code\n";
    
    return 
        "$_[1] ( sub $code->( \$m ) || 1 )" 
        unless $code =~ /return/;
        
    return
        "$_[1] ( ( \$capture = sub $code->( \$m ) ) 
$_[1]   && return \$m )";
}
sub capturing_group {
    my $program = $_[0];

    $program = emit_rule( $program, $_[1].'      ' )
        if ref( $program );
    my $rnd = rand;
    return "$_[1] do{ 
$_[1]     my \$hash = do {
$_[1]       my \$bool = 1;
$_[1]       my \$from = \$pos;
$_[1]       my \@match;
$_[1]       my \%named;
$_[1]       my \$capture;
$_[1]       my \$quantified;
$_[1]       \$bool = 0 unless
" .             $program . ";
$_[1]       { str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \$bool, to => \\(0+\$pos), capture => \\\$capture }
$_[1]     };
$_[1]     my \$bool = \$hash->{'bool'};
$_[1]     \$index{$rnd} = \$#match+1 unless defined \$index{$rnd};
$_[1]     if ( \$quantified ) {
$_[1]       if ( \$bool ) {
$_[1]         push \@{ \$match[\$index{$rnd}] }, bless \\\$hash, 'Pugs::Runtime::Match::Ratchet';
$_[1]       }
$_[1]       else {
$_[1]         \@{ \$match[\$index{$rnd}] } = () 
$_[1]           if ! defined \$match[\$index{$rnd}];
$_[1]       }
$_[1]     }
$_[1]     else {
$_[1]       if ( ! defined \$match[\$index{$rnd}] ) {
$_[1]         \$match[\$index{$rnd}] = bless \\\$hash, 'Pugs::Runtime::Match::Ratchet';
$_[1]       }
$_[1]       elsif ( ref( \$match[\$index{$rnd}] ) ne 'ARRAY' ) {
$_[1]         \$match[\$index{$rnd}] = [ \$match[\$index{$rnd}], bless \\\$hash, 'Pugs::Runtime::Match::Ratchet' ];
$_[1]       }
$_[1]       else {
$_[1]         push \@{ \$match[\$index{$rnd}] }, bless \\\$hash, 'Pugs::Runtime::Match::Ratchet';
$_[1]       }
$_[1]       #unshift \@{ \$match[\$index{$rnd}] } unless \$bool;
$_[1]     }
$_[1]     \$bool;
$_[1] }";
}        
sub named_capture {
    my $name    = $_[0]{ident};
    my $program = $_[0]{rule};
    my $flat    = $_[0]{flat};
    $program = emit_rule( $program, $_[1].'        ' )
        if ref( $program );
    # TODO - repeated captures create an Array

    my($try_match, $gen_match, $post_match);
    if ( $flat ) {
        $try_match = <<"."
$_[1]     my \$bool = 1;
$_[1]     \$bool = 0 unless
.
.            $program . ";\n";
        $gen_match = "\$match[-1]";
	$post_match = "\$#match--;";
    } else {
        $try_match = <<"." ;
$_[1]     my \$hash = do {
$_[1]       my \$bool = 1;
$_[1]       my \$from = \$pos;
$_[1]       my \@match;
$_[1]       my \%named;
$_[1]       my \$capture;
$_[1]       \$bool = 0 unless
$program;
$_[1]       { str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \\\$bool, to => \\(0+\$pos), capture => \\\$capture }
$_[1]     };
$_[1]     my \$bool = \${\$hash->{'bool'}};
.
        $gen_match = "bless \\\$hash, 'Pugs::Runtime::Match::Ratchet'";
	$post_match = "";
    }

    return "$_[1] do{ 
$try_match
$_[1]     if ( \$bool ) {
$_[1]       my \$match = $gen_match;
$_[1]       if ( \$quantified ) {
$_[1]         \$named{'$name'} = [] if ! defined \$named{'$name'};
$_[1]         push \@{\$named{'$name'}}, \$match;
$_[1]       } else {
$_[1]         if ( ! defined \$named{'$name'} ){
$_[1]           \$named{'$name'} = \$match;
$_[1]         } elsif ( ref ( \$named{'$name'} ) ne 'ARRAY' ){
$_[1]           \$named{'$name'} = [\$named{'$name'}, \$match];
$_[1]         } else {
$_[1]           push \@{ \$named{'$name'} }, \$match;
$_[1]         }
$_[1]       }
$_[1]     }
$_[1]     $post_match
$_[1]     \$bool;
$_[1] }";
}
sub before {
    my $program = $_[0]{rule};
    $program = emit_rule( $program, $_[1].'        ' )
        if ref( $program );
    return "$_[1] do{ 
$_[1]     my \$pos1 = \$pos;
$_[1]     do {
$_[1]       my \$pos = \$pos1;
$_[1]       my \$from = \$pos;
$_[1]       my \@match;
$_[1]       my \%named;
$_[1]       my \$capture;
$_[1]       \$bool = 0 unless
" .             $program . ";
$_[1]       \$bool;
$_[1]     };
$_[1] }";
}
sub after {
    local $direction = "-";
    my $program = $_[0]{rule};
    $program = emit_rule( $program, $_[1].'        ' )
        if ref( $program );
    return "$_[1] do{ 
$_[1]     my \$pos1 = \$pos;
$_[1]     do {
$_[1]       my \$pos = \$pos1 - 1;
$_[1]       my \$from = \$pos;
$_[1]       my \@match;
$_[1]       my \%named;
$_[1]       my \$capture;
$_[1]       \$bool = 0 unless
" .             $program . ";
$_[1]       \$bool;
$_[1]     };
$_[1] }";
}
sub colon {
    my $str = $_[0];
    return "$_[1] # : no-op\n"
        if $str eq ':';
    return "$_[1] ( \$pos >= length( \$s ) ) \n" 
        if $str eq '$';
    return "$_[1] ( \$pos == 0 ) \n" 
        if $str eq '^';
    die "'$str' not implemented";
}
sub constant {
    call_constant( @_ );
}

use vars qw( %char_class );
BEGIN {
    %char_class = map { $_ => 1 } qw( 
alpha
alnum
ascii
blank
cntrl
digit
graph
lower
print
punct
space
upper
word
xdigit
);
}

sub metasyntax {
    # <cmd>
    my $cmd = $_[0];   
    my $prefix = substr( $cmd, 0, 1 );
    if ( $prefix eq '@' ) {
        # XXX - wrap @array items - see end of Pugs::Grammar::Rule
        return 
            "$_[1] ... alternation( \\$cmd )\n";
    }
    if ( $prefix eq '$' ) {
        if ( $cmd =~ /::/ ) {
            # call method in fully qualified $package::var
            # ...->match( $rule, $str, $grammar, $flags, $state )  
            # TODO - send $pos to subrule
            return 
                "$_[1]         do {\n" .
                "$_[1]           push \@match,\n" . 
                "$_[1]             $cmd->match( \$s, \$grammar, {p => \$pos}, undef );\n" .
                "$_[1]           \$pos = \$match[-1]->to;\n" .
                "$_[1]           !\$match[-1] != 1;\n" .
                "$_[1]         }"
        }
        # call method in lexical $var
        # TODO - send $pos to subrule
        return 
                "$_[1]         do {\n" .
                "$_[1]           my \$r = Pugs::Runtime::Rule::get_variable( '$cmd' );\n" . 
                "$_[1]           push \@match,\n" . 
                "$_[1]             \$r->match( \$s, \$grammar, {p => \$pos}, undef );\n" .
                "$_[1]           \$pos = \$match[-1]->to;\n" .
                "$_[1]           !\$match[-1] != 1;\n" .
                "$_[1]         }"
    }
    if ( $prefix eq q(') ) {   # single quoted literal ' 
        $cmd = substr( $cmd, 1, -1 );
        return call_constant( $cmd, $_[1] );
    }
    if ( $prefix eq q(") ) {   # interpolated literal "
        $cmd = substr( $cmd, 1, -1 );
        warn "<\"...\"> not implemented";
        return;
    }
    if ( $prefix =~ /[-+[]/ ) {   # character class 
	   if ( $prefix eq '-' ) {
	       $cmd = '[^' . substr($cmd, 2);
	   } 
       elsif ( $prefix eq '+' ) {
	       $cmd = substr($cmd, 2);
	   }
	   # XXX <[^a]> means [\^a] instead of [^a] in perl5re

	   return call_perl5($cmd, $_[1]);
    }
    if ( $prefix eq '?' ) {   # non_capturing_subrule / code assertion
        $cmd = substr( $cmd, 1 );
        if ( $cmd =~ /^{/ ) {
            warn "code assertion not implemented";
            return;
        }
        return
	    "$_[1] do { my \$match =\n" .
	    call_subrule( $cmd, $_[1] . "          " ) . ";\n" .
	    "$_[1]      my \$bool = (!\$match != 1);\n" .
	    "$_[1]      \$pos = \$match->to if \$bool;\n" .
	    "$_[1]      \$bool;\n" .
	    "$_[1] }";
    }
    if ( $prefix eq '!' ) {   # negated_subrule / code assertion 
        $cmd = substr( $cmd, 1 );
        if ( $cmd =~ /^{/ ) {
            warn "code assertion not implemented";
            return;
        }
        return 
            "$_[1] ... negate( '$_[0]', \n" .
            call_subrule( $_[0], $_[1]."  " ) .
            "$_[1] )\n";
    }
    if ( $cmd eq '.' ) {
            warn "<$cmd> not implemented";
            return;
    }
    if ( $prefix =~ /[_[:alnum:]]/ ) {  
        # "before" and "after" are handled in a separate rule
        if ( $cmd eq 'cut' ) {
            warn "<$cmd> not implemented";
            return;
        }
        if ( $cmd eq 'commit' ) {
            warn "<$cmd> not implemented";
            return;
        }
        if ( $cmd eq 'prior' ) {
            warn "<$cmd> not implemented";
            return;
        }
        if ( $cmd eq 'null' ) {
            warn "<$cmd> not implemented";
            return;
        }
        if ( exists $char_class{$cmd} ) {
            # XXX - inlined char classes are not inheritable, but this should be ok
            return
                "$_[1] ( ( substr( \$s, \$pos, 1 ) =~ /[[:$cmd:]]/ ) 
$_[1]     ? do { $direction$direction\$pos; 1 }
$_[1]     : 0
$_[1] )";
        }
        # capturing subrule
        # <subrule ( param, param ) >
        my ( $subrule, $param_list ) = split( /[\(\)]/, $cmd );
        $param_list = '' unless defined $param_list;
        my @param = split( ',', $param_list );
        # TODO - send $pos to subrule
        return named_capture(
            { ident => $subrule, 
              rule => 
                "$_[1]         do {\n" . 
                "$_[1]           push \@match,\n" . 
                    call_subrule( $subrule, $_[1]."        ", @param ) . ";\n" .
                "$_[1]           my \$bool = (!\$match[-1] != 1);\n" .
                "$_[1]           \$pos = \$match[-1]->to if \$bool;\n" .
                #"print !\$match[-1], ' ', Dumper \$match[-1];\n" .
                "$_[1]           \$bool;\n" .
                "$_[1]         }",
	      flat => 1
            }, 
            $_[1],    
        );
    }
    die "<$cmd> not implemented";
}

1;