The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!perl
# Marpa::R3 is Copyright (C) 2017, Jeffrey Kegler.
#
# This module is free software; you can redistribute it and/or modify it
# under the same terms as Perl 5.10.1. For more details, see the full text
# of the licenses in the directory LICENSES.
#
# This program is distributed in the hope that it will be
# useful, but it is provided "as is" and without any express
# or implied warranties. For details, see the full text of
# of the licenses in the directory LICENSES.

# Test of scannerless parsing -- diagnostics

use 5.010001;

use strict;
use warnings;

use Test::More tests => 9;
use English qw( -no_match_vars );
use POSIX qw(setlocale LC_ALL);

POSIX::setlocale(LC_ALL, "C");

use lib 'inc';
use Marpa::R3::Test;
use Marpa::R3;

my $dsl = <<'END_OF_RULES';
:default ::= action => My_Actions::do_arg0
:start ::= Script
Script ::= Calculation* action => do_list
Calculation ::= Expression | ('say') Expression
Expression ::=
     Number
   | ('+') Expression Expression action => do_add
Number ~ [\d] +
:discard ~ whitespace
whitespace ~ [\s]+
# allow comments
:discard ~ <hash comment>
<hash comment> ~ <terminated hash comment> | <unterminated
   final hash comment>
<terminated hash comment> ~ '#' <hash comment body> <vertical space char>
<unterminated final hash comment> ~ '#' <hash comment body>
<hash comment body> ~ <hash comment char>*
<vertical space char> ~ [\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]
<hash comment char> ~ [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]
END_OF_RULES

my $grammar = Marpa::R3::Scanless::G->new(
    { semantics_package => 'My_Actions', source => \$dsl, } );

package My_Actions;
# The SELF object is a very awkward way of specifying the per-parse
# argument directly, one which was necessary before the $recce->value()
# method took an argument.
# This way of doing things is discourage and preserved here for testing purposes.
our $SELF;
sub new { return $SELF }

sub do_list {
    my ( $self, $v ) = @_;
    my @results = @{$v};
    return +( scalar @results ) . ' results: ' . join q{ }, @results;
}

sub do_add  { return $_[1]->[0] + $_[1]->[1] }
sub do_arg0 { return $_[1]->[0] }

sub show_last_expression {
    my ($self) = @_;
    my $recce = $self->{recce};
    my ( $start, $length ) = $recce->last_completed('Expression');
    return if not defined $start;
    my $last_expression = $recce->g1_literal( $start, $length );
    return $last_expression;
} ## end sub show_last_expression

package main;

sub my_parser {
    my ( $grammar, $string ) = @_;

    my $self = bless { grammar => $grammar }, 'My_Actions';
    local $My_Actions::SELF = $self;

    my $trace_output = q{};
    open my $trace_fh, q{>}, \$trace_output;
    my $recce = Marpa::R3::Scanless::R->new(
        {   grammar               => $grammar,
            trace_terminals       => 3,
            trace_file_handle     => $trace_fh,
            too_many_earley_items => 100,         # test this
        }
    );
    $self->{recce} = $recce;
    my ( $parse_value, $parse_status, $last_expression );

    my $eval_ok = eval { $recce->read( \$string ); 1 };
    close $trace_fh;

    if ( not defined $eval_ok ) {
        my $abbreviated_error = $EVAL_ERROR;
        chomp $abbreviated_error;
        $abbreviated_error =~ s/\n.*//xms;
        die $self->show_last_expression(), $EVAL_ERROR;
    } ## end if ( not defined $eval_ok )
    my $value_ref = $recce->value;
    if ( not defined $value_ref ) {
        die join q{ },
            'Input read to end but no parse',
            $self->show_last_expression();
    }
    return $recce, ${$value_ref}, $trace_output;
} ## end sub my_parser

my @tests_data = (
    [ '+++ 1 2 3 + + 1 2 4', '1 results: 13', 'Parse OK', 'entire input' ],
);

TEST:
for my $test_data (@tests_data) {
    my ($test_string,     $expected_value,
        $expected_result, $expected_last_expression
    ) = @{$test_data};
    my ( $recce, $actual_value, $trace_output ) =
        my_parser( $grammar, $test_string );

# Marpa::R3::Display
# name: Scanless terminals_expected() synopsis

    my @terminals_expected = @{$recce->terminals_expected()};

# Marpa::R3::Display::End

    Marpa::R3::Test::is(
        ( join q{ }, sort @terminals_expected ),
        'Number [Lex-0] [Lex-1]',
        qq{SLIF terminals_expected()}
    );

# Marpa::R3::Display
# name: Scanless progress_show() synopsis

    my $progress_show_output = $recce->progress_show();

# Marpa::R3::Display::End

    Marpa::R3::Test::is( $progress_show_output,
        <<'END_OF_EXPECTED_OUTPUT', qq{Scanless progress_show()} );
=== Earley set 11 at B1L1c20 ===
P2 B1L1c20 Calculation ::= . Expression
P3 B1L1c20 Calculation ::= . 'say' Expression
P4 B1L1c20 Expression ::= . Number
P5 B1L1c20 Expression ::= . '+' Expression Expression
F1 B1L1c1 [:start:] ::= Script .
F2 B1L1c1 Calculation ::= Expression .
F4 B1L1c19 Expression ::= Number .
F5 x2 B1L1c1-11 Expression ::= '+' Expression Expression .
F6 B1L1c1 Script ::= Calculation . *
END_OF_EXPECTED_OUTPUT

    Marpa::R3::Test::is( $actual_value, $expected_value,
        qq{Value of "$test_string"} );
    Marpa::R3::Test::is( $trace_output,
        <<'END_OF_OUTPUT', qq{Trace output for "$test_string"} );
Setting trace_terminals option
Expecting "Number" at earleme 0
Expecting "[Lex-0]" at earleme 0
Expecting "[Lex-1]" at earleme 0
Registering character U+002b "+" as symbol 6: [\+]
Registering character U+002b "+" as symbol 10: [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]
Registering character U+0020 " " as symbol 8: [\s]
Registering character U+0020 " " as symbol 10: [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]
Registering character U+0031 "1" as symbol 7: [\d]
Registering character U+0031 "1" as symbol 10: [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]
Registering character U+0032 "2" as symbol 7: [\d]
Registering character U+0032 "2" as symbol 10: [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]
Registering character U+0033 "3" as symbol 7: [\d]
Registering character U+0033 "3" as symbol 10: [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]
Registering character U+0034 "4" as symbol 7: [\d]
Registering character U+0034 "4" as symbol 10: [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]
Restarted recognizer at B1L1c1
Expected lexeme Number at B1L1c1; assertion ID = 0
Expected lexeme 'say' at B1L1c1; assertion ID = 1
Expected lexeme '+' at B1L1c1; assertion ID = 2
Reading codepoint "+" 0x002b at B1L1c1
Codepoint "+" 0x002b accepted as [\+] at B1L1c1
Codepoint "+" 0x002b rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c1
Attempting to read lexeme B1L1c1 e1: '+'; value="+"
Accepted lexeme B1L1c1 e1: '+'; value="+"
Restarted recognizer at B1L1c2
Expected lexeme Number at B1L1c2; assertion ID = 0
Expected lexeme '+' at B1L1c2; assertion ID = 2
Reading codepoint "+" 0x002b at B1L1c2
Codepoint "+" 0x002b accepted as [\+] at B1L1c2
Codepoint "+" 0x002b rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c2
Attempting to read lexeme B1L1c2 e2: '+'; value="+"
Accepted lexeme B1L1c2 e2: '+'; value="+"
Restarted recognizer at B1L1c3
Expected lexeme Number at B1L1c3; assertion ID = 0
Expected lexeme '+' at B1L1c3; assertion ID = 2
Reading codepoint "+" 0x002b at B1L1c3
Codepoint "+" 0x002b accepted as [\+] at B1L1c3
Codepoint "+" 0x002b rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c3
Attempting to read lexeme B1L1c3 e3: '+'; value="+"
Accepted lexeme B1L1c3 e3: '+'; value="+"
Restarted recognizer at B1L1c4
Expected lexeme Number at B1L1c4; assertion ID = 0
Expected lexeme '+' at B1L1c4; assertion ID = 2
Reading codepoint " " 0x0020 at B1L1c4
Codepoint " " 0x0020 accepted as [\s] at B1L1c4
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c4
Reading codepoint "1" 0x0031 at B1L1c5
Codepoint "1" 0x0031 rejected as [\d] at B1L1c5
Codepoint "1" 0x0031 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c5
Discarded lexeme B1L1c4: whitespace
Restarted recognizer at B1L1c5
Expected lexeme Number at B1L1c5; assertion ID = 0
Expected lexeme '+' at B1L1c5; assertion ID = 2
Reading codepoint "1" 0x0031 at B1L1c5
Codepoint "1" 0x0031 accepted as [\d] at B1L1c5
Codepoint "1" 0x0031 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c5
Reading codepoint " " 0x0020 at B1L1c6
Codepoint " " 0x0020 rejected as [\s] at B1L1c6
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c6
Attempting to read lexeme B1L1c5 e4: Number; value="1"
Accepted lexeme B1L1c5 e4: Number; value="1"
Restarted recognizer at B1L1c6
Expected lexeme Number at B1L1c6; assertion ID = 0
Expected lexeme '+' at B1L1c6; assertion ID = 2
Reading codepoint " " 0x0020 at B1L1c6
Codepoint " " 0x0020 accepted as [\s] at B1L1c6
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c6
Reading codepoint "2" 0x0032 at B1L1c7
Codepoint "2" 0x0032 rejected as [\d] at B1L1c7
Codepoint "2" 0x0032 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c7
Discarded lexeme B1L1c6: whitespace
Restarted recognizer at B1L1c7
Expected lexeme Number at B1L1c7; assertion ID = 0
Expected lexeme '+' at B1L1c7; assertion ID = 2
Reading codepoint "2" 0x0032 at B1L1c7
Codepoint "2" 0x0032 accepted as [\d] at B1L1c7
Codepoint "2" 0x0032 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c7
Reading codepoint " " 0x0020 at B1L1c8
Codepoint " " 0x0020 rejected as [\s] at B1L1c8
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c8
Attempting to read lexeme B1L1c7 e5: Number; value="2"
Accepted lexeme B1L1c7 e5: Number; value="2"
Restarted recognizer at B1L1c8
Expected lexeme Number at B1L1c8; assertion ID = 0
Expected lexeme '+' at B1L1c8; assertion ID = 2
Reading codepoint " " 0x0020 at B1L1c8
Codepoint " " 0x0020 accepted as [\s] at B1L1c8
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c8
Reading codepoint "3" 0x0033 at B1L1c9
Codepoint "3" 0x0033 rejected as [\d] at B1L1c9
Codepoint "3" 0x0033 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c9
Discarded lexeme B1L1c8: whitespace
Restarted recognizer at B1L1c9
Expected lexeme Number at B1L1c9; assertion ID = 0
Expected lexeme '+' at B1L1c9; assertion ID = 2
Reading codepoint "3" 0x0033 at B1L1c9
Codepoint "3" 0x0033 accepted as [\d] at B1L1c9
Codepoint "3" 0x0033 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c9
Reading codepoint " " 0x0020 at B1L1c10
Codepoint " " 0x0020 rejected as [\s] at B1L1c10
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c10
Attempting to read lexeme B1L1c9 e6: Number; value="3"
Accepted lexeme B1L1c9 e6: Number; value="3"
Restarted recognizer at B1L1c10
Expected lexeme Number at B1L1c10; assertion ID = 0
Expected lexeme '+' at B1L1c10; assertion ID = 2
Reading codepoint " " 0x0020 at B1L1c10
Codepoint " " 0x0020 accepted as [\s] at B1L1c10
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c10
Reading codepoint "+" 0x002b at B1L1c11
Codepoint "+" 0x002b rejected as [\+] at B1L1c11
Codepoint "+" 0x002b rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c11
Discarded lexeme B1L1c10: whitespace
Restarted recognizer at B1L1c11
Expected lexeme Number at B1L1c11; assertion ID = 0
Expected lexeme '+' at B1L1c11; assertion ID = 2
Reading codepoint "+" 0x002b at B1L1c11
Codepoint "+" 0x002b accepted as [\+] at B1L1c11
Codepoint "+" 0x002b rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c11
Attempting to read lexeme B1L1c11 e7: '+'; value="+"
Accepted lexeme B1L1c11 e7: '+'; value="+"
Restarted recognizer at B1L1c12
Expected lexeme Number at B1L1c12; assertion ID = 0
Expected lexeme '+' at B1L1c12; assertion ID = 2
Reading codepoint " " 0x0020 at B1L1c12
Codepoint " " 0x0020 accepted as [\s] at B1L1c12
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c12
Reading codepoint "+" 0x002b at B1L1c13
Codepoint "+" 0x002b rejected as [\+] at B1L1c13
Codepoint "+" 0x002b rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c13
Discarded lexeme B1L1c12: whitespace
Restarted recognizer at B1L1c13
Expected lexeme Number at B1L1c13; assertion ID = 0
Expected lexeme '+' at B1L1c13; assertion ID = 2
Reading codepoint "+" 0x002b at B1L1c13
Codepoint "+" 0x002b accepted as [\+] at B1L1c13
Codepoint "+" 0x002b rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c13
Attempting to read lexeme B1L1c13 e8: '+'; value="+"
Accepted lexeme B1L1c13 e8: '+'; value="+"
Restarted recognizer at B1L1c14
Expected lexeme Number at B1L1c14; assertion ID = 0
Expected lexeme '+' at B1L1c14; assertion ID = 2
Reading codepoint " " 0x0020 at B1L1c14
Codepoint " " 0x0020 accepted as [\s] at B1L1c14
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c14
Reading codepoint "1" 0x0031 at B1L1c15
Codepoint "1" 0x0031 rejected as [\d] at B1L1c15
Codepoint "1" 0x0031 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c15
Discarded lexeme B1L1c14: whitespace
Restarted recognizer at B1L1c15
Expected lexeme Number at B1L1c15; assertion ID = 0
Expected lexeme '+' at B1L1c15; assertion ID = 2
Reading codepoint "1" 0x0031 at B1L1c15
Codepoint "1" 0x0031 accepted as [\d] at B1L1c15
Codepoint "1" 0x0031 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c15
Reading codepoint " " 0x0020 at B1L1c16
Codepoint " " 0x0020 rejected as [\s] at B1L1c16
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c16
Attempting to read lexeme B1L1c15 e9: Number; value="1"
Accepted lexeme B1L1c15 e9: Number; value="1"
Restarted recognizer at B1L1c16
Expected lexeme Number at B1L1c16; assertion ID = 0
Expected lexeme '+' at B1L1c16; assertion ID = 2
Reading codepoint " " 0x0020 at B1L1c16
Codepoint " " 0x0020 accepted as [\s] at B1L1c16
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c16
Reading codepoint "2" 0x0032 at B1L1c17
Codepoint "2" 0x0032 rejected as [\d] at B1L1c17
Codepoint "2" 0x0032 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c17
Discarded lexeme B1L1c16: whitespace
Restarted recognizer at B1L1c17
Expected lexeme Number at B1L1c17; assertion ID = 0
Expected lexeme '+' at B1L1c17; assertion ID = 2
Reading codepoint "2" 0x0032 at B1L1c17
Codepoint "2" 0x0032 accepted as [\d] at B1L1c17
Codepoint "2" 0x0032 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c17
Reading codepoint " " 0x0020 at B1L1c18
Codepoint " " 0x0020 rejected as [\s] at B1L1c18
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c18
Attempting to read lexeme B1L1c17 e10: Number; value="2"
Accepted lexeme B1L1c17 e10: Number; value="2"
Restarted recognizer at B1L1c18
Expected lexeme Number at B1L1c18; assertion ID = 0
Expected lexeme '+' at B1L1c18; assertion ID = 2
Reading codepoint " " 0x0020 at B1L1c18
Codepoint " " 0x0020 accepted as [\s] at B1L1c18
Codepoint " " 0x0020 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c18
Reading codepoint "4" 0x0034 at B1L1c19
Codepoint "4" 0x0034 rejected as [\d] at B1L1c19
Codepoint "4" 0x0034 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c19
Discarded lexeme B1L1c18: whitespace
Restarted recognizer at B1L1c19
Expected lexeme Number at B1L1c19; assertion ID = 0
Expected lexeme '+' at B1L1c19; assertion ID = 2
Reading codepoint "4" 0x0034 at B1L1c19
Codepoint "4" 0x0034 accepted as [\d] at B1L1c19
Codepoint "4" 0x0034 rejected as [^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}] at B1L1c19
Attempting to read lexeme B1L1c19 e11: Number; value="4"
Accepted lexeme B1L1c19 e11: Number; value="4"
END_OF_OUTPUT

    my $expected_progress_output = [
        [ 1, 1, 0 ],
        [ 2, 0, 11 ],
        [ 2, 1, 0 ],
        [ 3, 0, 11 ],
        [ 4, 0, 11 ],
        [ 4, 1, 10 ],
        [ 5, 0, 11 ],
        [ 5, 3, 0 ],
        [ 5, 3, 6 ],
        [ 6, 1, 0 ]
    ];

# Marpa::R3::Display
# name: Scanless progress() synopsis

    my $progress_output = $recce->progress();

# Marpa::R3::Display::End

    Marpa::R3::Test::is(
        Data::Dumper::Dumper($progress_output),
        Data::Dumper::Dumper($expected_progress_output),
        qq{Scanless progress()}
    );

# Marpa::R3::Display
# name: Scanless g1_pos() synopsis

    my $g1_pos = $recce->g1_pos();

# Marpa::R3::Display::End

    Test::More::is( $g1_pos, 11, qq{Scanless g1_pos()} );

# Marpa::R3::Display
# name: recognizer input_length() synopsis

    my $input_length = $recce->input_length();

# Marpa::R3::Display::End

    Test::More::is( $input_length, 19, qq{Scanless input_length()} );

# Marpa::R3::Display
# name: recognizer input_length() 1-arg synopsis

    $input_length = $recce->input_length(1);

# Marpa::R3::Display::End

    Test::More::is( $input_length, 19, qq{Scanless 1-arg input_length()} );

    # Test translation from G1 location to input stream spans
    my %location_seen = ();
    my @spans         = ();
    for my $g1_location (
        sort { $a <=> $b }
        grep { !$location_seen{$_}++ and $_ != $g1_pos ; } map { $_->[-1] } @{$progress_output}
        )
    {

# Marpa::R3::Display
# name: Scanless g1_to_block_first() synopsis

        my ( $first_block, $first_offset ) =
            $recce->g1_to_block_first($g1_location);

# Marpa::R3::Display::End

# Marpa::R3::Display
# name: Scanless g1_to_block_last() synopsis

        my ( $last_block, $last_offset ) =
            $recce->g1_to_block_last($g1_location);

# Marpa::R3::Display::End

        push @spans, [ $g1_location, $first_block, $first_offset, $last_block, $last_offset];
    }

    # One result for each unique G1 location in progress report
    # Format of each result is
    #    [g1_location, first_block, first_offset, last_block, last_offset]

    my $expected_spans =
          [ [ 0, 1, 0, 1, 0 ], [ 6, 1, 10, 1, 10 ], [ 10, 1, 18, 1, 18 ] ];
    Test::More::is_deeply( \@spans, $expected_spans,
        qq{Scanless g1_to_block_first() & g1_to_block_last() } );

} ## end TEST: for my $test_data (@tests_data)

# vim: expandtab shiftwidth=4: