The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#line 1
##
# name:      Pegex::Parser
# abstract:  Pegex Parser Runtime
# author:    Ingy döt Net <ingy@cpan.org>
# license:   perl
# copyright: 2011
# see:
# - Pegex::Grammar

package Pegex::Parser;
use Pegex::Mo;

use Scalar::Util;
use Pegex::Input;

# Grammar object or class
has 'grammar';
# Receiver object or class
has 'receiver' => (
    default => sub {
        require Pegex::Receiver;
        Pegex::Receiver->new();
    },
);

#
# Parser options
#

# Allow errors to not be thrown
has 'throw_on_error' => ( default => sub {1} );

# Wrap results in hash with rule name for key
has 'wrap' => ( default => sub { $_[0]->receiver->wrap } );

# # Allow a partial parse
# has 'partial' => default => sub {0};

# Internal properties.
has 'input';                # Input object to read from
has 'buffer';               # Input buffer to parse
has 'error';                # Error message goes here
has 'position' => (         # Current position in buffer
    default => sub {0},
);
has 'farthest' => (         # Farthest point matched in buffer
    default => sub {0},
);

# Debug the parsing of input.
has 'debug' => builder => 'debug_';

sub debug_ {
    exists($ENV{PERL_PEGEX_DEBUG}) ? $ENV{PERL_PEGEX_DEBUG} :
    defined($Pegex::Parser::Debug) ? $Pegex::Parser::Debug :
    0;
}

sub parse {
    my $self = shift;
    $self = $self->new unless ref $self;

    die "Usage: " . ref($self) . '->parse($input [, $start_rule]'
        unless 1 <= @_ and @_ <= 2;

    my $input = (ref $_[0] and UNIVERSAL::isa($_[0], 'Pegex::Input'))
        ? shift
        : Pegex::Input->new(shift)->open;
    $self->input($input);

    $self->buffer($self->input->read);

    my $grammar = $self->grammar or die "No 'grammar'. Can't parse";
    if (not ref $grammar) {
        eval "require $grammar";
        $self->grammar($grammar->new);
    }

    my $start_rule = shift ||
        $self->grammar->tree->{'+top'} ||
        ($self->grammar->tree->{'TOP'} ? 'TOP' : undef)
            or die "No starting rule for Pegex::Parser::parse";

    my $receiver = $self->receiver or die "No 'receiver'. Can't parse";
    if (not ref $receiver) {
        eval "require $receiver";
        $self->receiver($receiver->new);
    }
    # Add circular ref and weaken it.
    $self->receiver->parser($self);
    Scalar::Util::weaken($self->receiver->{parser});

    # Do the parse
    my $match = $self->match($start_rule) or return;

    # Parse was successful!
    $self->input->close;
    return ($self->receiver->data || $match);
}

sub match {
    my ($self, $rule) = @_;

    $self->receiver->initialize($rule)
        if $self->receiver->can("initialize");

    my $match = $self->match_next({'.ref' => $rule});
    if (not $match or $self->position < length($self->buffer)) {
        $self->throw_error("Parse document failed for some reason");
        return;  # In case $self->throw_on_error is off
    }
    $match = $match->[0];

    $match = $self->receiver->finalize($match, $rule)
        if $self->receiver->can("finalize");

    $match = {$rule => []} unless $match;

    $match = $match->{TOP} || $match if $rule eq 'TOP';

    return $match;
}

sub get_min_max {
    my ($self, $next) = @_;
    defined($next->{'+min'})
    ? defined($next->{'+max'})
        ? (@{$next}{qw'+min +max'})
        : ($next->{'+min'}, 0)
    : defined($next->{'+max'})
        ? (0, $next->{'+max'})
        : (1, 1);
}

sub match_next {
    my ($self, $next) = @_;

    return $self->match_next_with_sep($next)
        if $next->{'.sep'};

    my ($min, $max) = $self->get_min_max($next);
    my $assertion = $next->{'+asr'} || 0;
    my ($rule, $kind) = map {($next->{".$_"}, $_)}
        grep {$next->{".$_"}} qw(ref rgx all any err code) or XXX $next;

    my ($match, $position, $count, $method) =
        ([], $self->position, 0, "match_$kind");

    while (my $return = $self->$method($rule, $next)) {
        $position = $self->position unless $assertion;
        $count++;
        push @$match, @$return;
        last if $max == 1;
    }
    if ($max != 1) {
        $match = [$match];
        $self->set_position($position);
    }
    my $result = (($count >= $min and (not $max or $count <= $max)) ? 1 : 0)
        ^ ($assertion == -1);
    $self->set_position($position)
        if not($result) or $assertion;

    $match = [] if $next->{'-skip'};
    return ($result ? $match : 0);
}

sub match_next_with_sep {
    my ($self, $next) = @_;

    my ($min, $max) = $self->get_min_max($next);
    my ($rule, $kind) = map {($next->{".$_"}, $_)}
        grep {$next->{".$_"}} qw(ref rgx all any err) or XXX $next;
    my $separator = $next->{'.sep'};

    my ($match, $position, $count, $method, $scount, $smin, $smax) =
        ([], $self->position, 0, "match_$kind", 0,
            $self->get_min_max($separator));
    while (my $return = $self->$method($rule, $next)) {
        $position = $self->position;
        $count++;
        push @$match, @$return;
        $return = $self->match_next($separator) or last;
        my @return = @$return;
        if (@return) {
            @return = @{$return[0]} if $smax != 1;
            push @$match, @return;
        }
        $scount++;
    }
    if ($max != 1) {
        $match = [$match];
    }
    my $result = (($count >= $min and (not $max or $count <= $max)) ? 1 : 0);
    $self->set_position($position)
        if $count == $scount and not $separator->{'+eok'};

    $match = [] if $next->{'-skip'};
    return ($result ? $match : 0);
}

sub match_ref {
    my ($self, $ref, $parent) = @_;
    my $rule = $self->grammar->tree->{$ref};
    $rule ||= $self->can("match_rule_$ref")
            ? { '.code' => $ref } 
            : die "\n\n*** No grammar support for '$ref'\n\n";

    my $trace = (not $rule->{'+asr'} and $self->debug);
    $self->trace("try_$ref") if $trace;

    my $match = (ref($rule) eq 'CODE')
        ? $self->$rule()
        : $self->match_next($rule);
    if ($match) {
        $self->trace("got_$ref") if $trace;
        if (not $rule->{'+asr'} and not $parent->{'-skip'}) {
            my $callback = "got_$ref";
            if (my $sub = $self->receiver->can($callback)) {
                $match = [ $sub->($self->receiver, $match->[0]) ];
            }
            elsif ($self->wrap ? not($parent->{'-pass'}) : $parent->{'-wrap'}) {
                $match = [ @$match ? { $ref => $match->[0] } : () ];
            }
        }
    }
    else {
        $self->trace("not_$ref") if $trace;
        $match = 0;
    }

    return $match;
}

my $terminater = 0;
sub match_rgx {
    my ($self, $regexp, $parent) = @_;

    my $start = pos($self->{buffer}) = $self->position;
    die "Your grammar seems to not terminate at end of stream"
        if $start >= length $self->{buffer} and $terminater++ > 1000;
    $self->{buffer} =~ /$regexp/g or return 0;
    my $finish = pos($self->{buffer});
    no strict 'refs';
    my $match = [ map $$_, 1..$#+ ];
    $match = [ $match ] if $#+ > 1;

    $self->set_position($finish);

    return $match;
}

sub match_all {
    my ($self, $list, $parent) = @_;
    my $pos = $self->position;
    my $set = [];
    my $len = 0;
    for my $elem (@$list) {
        if (my $match = $self->match_next($elem)) {
            next if $elem->{'+asr'} or $elem->{'-skip'};
            push @$set, @$match;
            $len++;
        }
        else {
            $self->set_position($pos);
            return 0;
        }
    }
    $set = [ $set ] if $len > 1;
    return $set;
}

sub match_any {
    my ($self, $list, $parent) = @_;
    for my $elem (@$list) {
        if (my $match = $self->match_next($elem)) {
            return $match;
        }
    }
    return 0;
}

sub match_err {
    my ($self, $error) = @_;
    $self->throw_error($error);
}

sub match_code {
    my ($self, $code) = @_;
    my $method = "match_rule_$code";
    return $self->$method();
}

sub set_position {
    my ($self, $position) = @_;
    $self->position($position);
    $self->farthest($position) if $position > $self->farthest;
}

sub trace {
    my ($self, $action) = @_;
    my $indent = ($action =~ /^try_/) ? 1 : 0;
    $self->{indent} ||= 0;
    $self->{indent}-- unless $indent;
    print STDERR ' ' x $self->{indent};
    $self->{indent}++ if $indent;
    my $snippet = substr($self->buffer, $self->position);
    $snippet = substr($snippet, 0, 30) . "..." if length $snippet > 30;
    $snippet =~ s/\n/\\n/g;
    print STDERR sprintf("%-30s", $action) . ($indent ? " >$snippet<\n" : "\n");
}

sub throw_error {
    my ($self, $msg) = @_;
    $self->format_error($msg);
    return 0 unless $self->throw_on_error;
    require Carp;
    Carp::croak($self->error);
}

sub format_error {
    my ($self, $msg) = @_;
    my $position = $self->farthest;
    my $line = @{[substr($self->buffer, 0, $position) =~ /(\n)/g]} + 1;
    my $column = $position - rindex($self->buffer, "\n", $position);
    my $context = substr($self->buffer, $position, 50);
    $context =~ s/\n/\\n/g;
    $self->error(<<"...");
Error parsing Pegex document:
  msg: $msg
  line: $line
  column: $column
  context: "$context"
  position: $position
...
    $@ = $self->error;
}

1;