#!/usr/bin/perl -w
# Copyright 2012 Jeffrey Kegler
# This file is part of Marpa::XS. Marpa::XS is free software: you can
# redistribute it and/or modify it under the terms of the GNU Lesser
# General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
#
# Marpa::XS is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser
# General Public License along with Marpa::XS. If not, see
# http://www.gnu.org/licenses/.
use 5.010;
use strict;
use warnings;
use charnames ':full';
use Scalar::Util;
use Data::Dumper ();
use English qw( -no_match_vars );
use Test::More ();
use lib 'pperl';
BEGIN {
my $PPI_problem;
CHECK_PPI: {
if ( not eval { require PPI } ) {
$PPI_problem = 'PPI not installed';
last CHECK_PPI;
}
if ( not PPI->VERSION(1.206) ) {
$PPI_problem = 'PPI 1.206 not installed';
}
} ## end CHECK_PPI:
if ($PPI_problem) {
Test::More::plan skip_all => $PPI_problem;
}
else {
Test::More::plan tests => 14;
}
Test::More::use_ok('Marpa::XS');
Test::More::use_ok('Marpa::XS::Perl');
} ## end BEGIN
our @OUTPUT = ();
our %SYMTAB = ( SCALAR => {} );
sub DEBUG_dump {
say {*STDERR} 'DEBUG: ', join "\n", @main::OUTPUT
or die "Cannot print to STDERR: $ERRNO";
say {*STDERR} 'DEBUG: Symbol table: ', Data::Dumper::Dumper( \%SYMTAB )
or die "Cannot print to STDERR: $ERRNO";
return;
} ## end sub DEBUG_dump
# This code is about Perl GRAMMAR.
# If you're writing
# a Perl SEMANTICS, and looking for a place to start,
# you probably don't want to start here.
# The purpose of these semantics is to test the grammar -- no more.
# They are probably good for nothing else.
#
# Here are some of the defects:
#
# 1. Not a 'safe' evaluator for code from untrusted sources.
# 'eval' is used to interpret the string constants.
#
# 2. Most Perl semantics is not implementation and where
# the implementation exists it often is at the toy level.
# Basically, anything not needed to interpret
# Data::Dumper output is ignored.
#
# 3. No optimization. It's fast enough for a test suite.
#
# 4. Etc., etc., etc. You get the idea.
sub coerce_to_R {
my ($tagged) = @_;
my ( $side, $v ) = @{$tagged};
return $side eq 'R' ? $v : ${$v};
}
sub do_term_lstop {
my ( undef, $lstop, $list_tagged ) = @_;
die "Unimplemented lstop: $lstop" if $lstop ne 'bless';
my $list_ref = coerce_to_R($list_tagged);
return [ 'L', \\( bless $list_ref->[0], $list_ref->[1] ) ];
} ## end sub do_term_lstop
# term_hi : term_hi ARROW '{' expr ';' '}' ; term_hi__arrow_hash /* somehref->{bar();} */
sub do_term_hi__arrow_hash {
my ( undef, $term, undef, undef, $element ) = @_;
my $element_ref = coerce_to_R($element);
my $element_ref_type = Scalar::Util::reftype $element_ref;
die "element in term->[element] is not an scalar: $element_ref_type"
if $element_ref_type ne 'SCALAR';
my ( $term_side, $term_ref ) = @{$term};
if ( $term_side eq 'L' ) {
$term_ref = ${$term_ref};
}
if ( ( my $ref_type = Scalar::Util::reftype $term_ref) ne 'REF'
or ( my $ref_ref_type = Scalar::Util::reftype ${$term_ref} ) ne
'HASH' )
{
my $type = $ref_type eq 'REF' ? "REF to $ref_ref_type" : $ref_type;
die "term in term->[element] is not an array ref: it is $type";
} ## end if ( ( my $ref_type = Scalar::Util::reftype $term_ref...))
return [ 'L', \\( ${$term_ref}->{ ${$element_ref} } ) ];
} ## end sub do_term_hi__arrow_hash
# term_hi : term_hi ARROW '[' expr ']' ; term_hi__arrow_array /* somearef->[$element] */
sub do_term_hi__arrow_array {
my ( undef, $term, undef, undef, $element ) = @_;
my $element_ref = coerce_to_R($element);
my $element_ref_type = Scalar::Util::reftype $element_ref;
die "element in term->[element] is not an scalar: $element_ref_type"
if $element_ref_type ne 'SCALAR';
my ( $term_side, $term_ref ) = @{$term};
if ( $term_side eq 'L' ) {
$term_ref = ${$term_ref};
}
if ( ( my $ref_type = Scalar::Util::reftype $term_ref) ne 'REF'
or ( my $ref_ref_type = Scalar::Util::reftype ${$term_ref} ) ne
'ARRAY' )
{
my $type = $ref_type eq 'REF' ? "REF to $ref_ref_type" : $ref_type;
die "term in term->[element] is not an array ref: it is $type";
} ## end if ( ( my $ref_type = Scalar::Util::reftype $term_ref...))
return [ 'L', \\( ${$term_ref}->[ ${$element_ref} ] ) ];
} ## end sub do_term_hi__arrow_array
# term_hi : scalar '{' expr ';' '}' ; hash_index /* $foo->{bar();} */
# term_hi : term_hi '{' expr ';' '}' ; hash_index_r /* $foo->[bar]->{baz;} */
sub do_hash_index {
my ( undef, $term, undef, $element ) = @_;
my $element_ref = coerce_to_R($element);
my $element_ref_type = Scalar::Util::reftype $element_ref;
die "element in term->[element] is not an scalar: $element_ref_type"
if $element_ref_type ne 'SCALAR';
my ( $term_side, $term_ref ) = @{$term};
if ( $term_side eq 'R' ) {
die 'rvalue term in scalar[element] not implemented';
}
if ( ( my $ref_type = Scalar::Util::reftype ${$term_ref} ) ne 'REF'
or ( my $ref_ref_type = Scalar::Util::reftype ${ ${$term_ref} } ) ne
'HASH' )
{
my $type = $ref_type eq 'REF' ? "REF to $ref_ref_type" : $ref_type;
die "scalar in scalar[element] is not an hash ref: it is $type";
} ## end if ( ( my $ref_type = Scalar::Util::reftype ${$term_ref...}))
return [ 'L', \\( ${ ${$term_ref} }->{ ${$element_ref} } ) ];
} ## end sub do_hash_index
sub do_array_index {
my ( undef, $term, undef, $element ) = @_;
my $element_ref = coerce_to_R($element);
my $element_ref_type = Scalar::Util::reftype $element_ref;
die "element in term->[element] is not an scalar: $element_ref_type"
if $element_ref_type ne 'SCALAR';
my ( $term_side, $term_ref ) = @{$term};
if ( $term_side eq 'R' ) {
die 'rvalue term in scalar[element] not implemented';
}
if ( ( my $ref_type = Scalar::Util::reftype ${$term_ref} ) ne 'REF'
or ( my $ref_ref_type = Scalar::Util::reftype ${ ${$term_ref} } ) ne
'ARRAY' )
{
my $type = $ref_type eq 'REF' ? "REF to $ref_ref_type" : $ref_type;
die "scalar in scalar[element] is not an hash ref: it is $type";
} ## end if ( ( my $ref_type = Scalar::Util::reftype ${$term_ref...}))
return [ 'L', \\( ${ ${$term_ref} }->[ ${$element_ref} ] ) ];
} ## end sub do_array_index
sub do_argexpr {
my ( undef, $argexpr, undef, $term ) = @_;
my $argexpr_ref = coerce_to_R($argexpr);
my @result;
given ( Scalar::Util::reftype $argexpr_ref) {
when ('REF') { push @result, ${$argexpr_ref} }
when ('SCALAR') { push @result, ${$argexpr_ref} }
when ('ARRAY') { push @result, @{$argexpr_ref} }
when ('HASH') { push @result, %{$argexpr_ref} }
default { die "Unknown argexpr type: $_" }
} ## end given
my $term_ref = coerce_to_R($term);
given ( Scalar::Util::reftype $term_ref) {
when ('REF') { push @result, ${$term_ref} }
when ('SCALAR') { push @result, ${$term_ref} }
when ('ARRAY') { push @result, @{$term_ref} }
when ('HASH') { push @result, %{$term_ref} }
default { die "Unknown term type: $_" }
} ## end given
return [ 'L', \\@result ];
} ## end sub do_argexpr
# scalar assignment only
sub do_assign {
my ( undef, $lhs, undef, $rhs ) = @_;
my ( $side, $lhs_ref ) = @{$lhs};
my $rhs_ref = coerce_to_R($rhs);
# If the LHS is actually an rvalue,
# it is the name of a variable
# passed up from a 'scalar' rule.
# In this 'toy' semantics, that's how
# variables are "declared".
if ( $side eq 'R' ) {
my $name = ${$lhs_ref};
if ( not defined $name or ref $name ) {
die 'assignment to non-lvalue: ', Data::Dumper::Dumper($name);
}
my $v = ${$rhs_ref};
$SYMTAB{SCALAR}->{$name} = \$v;
$lhs_ref = \( $SYMTAB{SCALAR}->{$name} );
return [ 'L', $lhs_ref ];
} ## end if ( $side eq 'R' )
if ( Scalar::Util::readonly ${ ${$lhs_ref} } ) {
die 'lhs is read only!';
}
${ ${$lhs_ref} } = ${$rhs_ref};
return [ 'L', $lhs_ref ];
} ## end sub do_assign
sub do_THING {
my ( undef, $value ) = @_;
if ( Scalar::Util::looks_like_number($value) ) {
## no critic(BuiltinFunctions::ProhibitStringyEval)
$value = eval $value;
## use critic
} ## end if ( Scalar::Util::looks_like_number($value) )
return [ 'R', \$value ];
} ## end sub do_THING
sub do_anon_array {
my ( undef, undef, $expr ) = @_;
my $value_ref = coerce_to_R($expr);
my @result = ();
given ( Scalar::Util::reftype $value_ref) {
when ('SCALAR') { push @result, ${$value_ref} }
when ('REF') { push @result, ${$value_ref} }
when ('ARRAY') { push @result, @{$value_ref} }
when ('HASH') { push @result, %{$value_ref} }
default { die "Unknown expr type: $_" }
} ## end given
return [ 'L', \\[@result] ];
} ## end sub do_anon_array
sub do_anon_empty_array {
return [ 'L', \\[] ];
}
sub do_anon_hash {
my ( undef, undef, $expr ) = @_;
my $value_ref = coerce_to_R($expr);
my $result;
given ( Scalar::Util::reftype $value_ref) {
when ('REF') {
die 'expr for anon hash cannot be REF'
}
when ('SCALAR') {
die 'expr for anon hash cannot be SCALAR'
}
when ('ARRAY') {
$result = {
@{$value_ref}
}
}
when ('HASH') { $result = \%{$value_ref} }
default { die "Unknown expr type: $_" }
} ## end given
return [ 'R', \$result ];
} ## end sub do_anon_hash
sub do_anon_empty_hash {
return [ 'R', \{} ];
}
# This assume that all 'my' variables
# are just ways to create
# undef lvalue's -- which is how
# Data::Dumper uses them
sub do_term_my {
my $v = undef;
return [ 'L', \\$v ];
}
# Very simplified here --
# References are dereferenced and passed up.
# All scalars not
# already defined are returned as strings.
# It is assumed that they will either be the only
# thing on the LHS of an assignment, or in
# a my declaration. Data::Dumper uses my
# declarations to create undef's so the scalar
# names
# that go up to term_my's will be thrown away.
sub do_scalar {
my ( undef, $dollar, $tagged_ob ) = @_;
my ( $side, $ob_ref ) = @{$tagged_ob};
if ( $side eq 'R' ) {
my $name = ${$ob_ref};
my $scalars = $SYMTAB{SCALAR};
if ( exists $scalars->{$name} ) {
return [ 'L', \$scalars->{$name} ];
}
return [ 'R', \$name ];
} ## end if ( $side eq 'R' )
$ob_ref = ${$ob_ref};
my $ob = ${$ob_ref};
if ( ref $ob ) {
return [ 'L', \$ob ];
}
return [ 'R', $ob ];
} ## end sub do_scalar
sub do_uniop {
my ( undef, $op ) = @_;
die "Unknown uniop: $op" if $op ne 'undef';
return [ 'R', \undef ];
}
# refgen is always an rvalue
sub do_refgen {
my ( undef, undef, $s1 ) = @_;
return [ 'R', \coerce_to_R($s1) ];
}
# prog should always return an rvalue
sub do_prog {
my ( undef, $s1 ) = @_;
return [ 'R', coerce_to_R($s1) ];
}
sub symbol_1 {
my ( undef, $s1 ) = @_;
return $s1;
}
sub symbol_2 {
my ( undef, undef, $s2 ) = @_;
return $s2;
}
sub token_1 {
my ( undef, $a ) = @_;
return [ 'R', \$a ];
}
my %unwrapped = (
and_expr__t => \&symbol_1,
anon_empty_hash => \&do_anon_empty_hash,
anon_hash => \&do_anon_hash,
argexpr__comma => \&symbol_1,
argexpr => \&do_argexpr,
argexpr__t => \&symbol_1,
array_index => \&do_array_index,
array_index_r => \&do_array_index,
block => \&symbol_2,
do_block => \&symbol_2,
expr => \&symbol_1,
hash_index => \&do_hash_index,
hash_index_r => \&do_hash_index,
indirob__block => \&symbol_1,
indirob__WORD => \&token_1,
lineseq__line => \&symbol_2,
line__sideff => \&symbol_2,
listexpr => \&symbol_1,
myterm_scalar => \&symbol_1,
or_expr__t => \&symbol_1,
prog => \&do_prog,
refgen => \&do_refgen,
scalar => \&do_scalar,
sideff => \&symbol_1,
term_addop__t => \&symbol_1,
term_andand__t => \&symbol_1,
term_arrow__t => \&symbol_1,
term_assign => \&do_assign,
term_assign_lstop => \&do_assign,
term_assign__t => \&symbol_1,
term_bitandop__t => \&symbol_1,
term_bitorop__t => \&symbol_1,
term_cond__t => \&symbol_1,
term_dotdot__t => \&symbol_1,
term_eqop__t => \&symbol_1,
term_hi__anon_array => \&do_anon_array,
term_hi__anon_empty_array => \&do_anon_empty_array,
term_hi__arrow_array => \&do_term_hi__arrow_array,
term_hi__arrow_hash => \&do_term_hi__arrow_hash,
term_hi__parens => \&symbol_2,
term_hi__scalar => \&symbol_1,
term_hi__subscripted => \&symbol_1,
term_hi__THING => \&do_THING,
term_increment__t => \&symbol_1,
term_listop__t => \&symbol_1,
term_lstop => \&do_term_lstop,
term_matchop__t => \&symbol_1,
term_mulop__t => \&symbol_1,
term_my => \&do_term_my,
term_notop__t => \&symbol_1,
term_oror__t => \&symbol_1,
term_powop__t => \&symbol_1,
term_relop__t => \&symbol_1,
term_require__t => \&symbol_1,
term_shiftop__t => \&symbol_1,
term__t => \&symbol_1,
term_uminus__t => \&symbol_1,
term_uniop__t => \&symbol_1,
uniop => \&do_uniop,
);
sub gen_closure {
my ( $lhs, $rhs, $action ) = @_;
my $closure = $unwrapped{$action};
die "lhs=$lhs: $closure is not a closure"
if defined $closure and ref $closure ne 'CODE';
return sub {
if ( not defined $closure ) {
die 'No action defined for ',
"$lhs ::= " . ( join q{ }, map { $_ // q{-} } @{$rhs} );
}
my $v = $closure->(@_);
local $Data::Dumper::Terse = 1;
local $Data::Dumper::Indent = 0;
# local $Data::Dumper::Maxdepth = 4;
push @main::OUTPUT,
"$lhs ::= "
. ( join q{ }, map { $_ // q{-} } @{$rhs} ) . q{; }
. Data::Dumper::Dumper( \$v );
$v;
};
} ## end sub gen_closure
my %symbol = ();
my %closure = ();
## Tests from dumper.t
my $parser = Marpa::XS::Perl->new( \&gen_closure );
# Perlcritic cannot figure out that $a and $b are not magic variables
# for a sort comparison
# Trivial
if (1) {
my $a = 1;
test( [$a], [qw(a)] );
}
if (1) {
my @c = ('c');
my $c = \@c;
my $b = {};
my $a = [ 1, $b, $c ];
$b->{a} = $a;
$b->{b} = $a->[1];
$b->{c} = $a->[2];
test( [ $a, $b, $c ], [qw(a b c)] );
} ## end if (1)
if (1) {
my $foo = {
"abc\N{NULL}\'\efg" => "mno\N{NULL}",
'reftest' => \\1,
};
test( [$foo], [qw($foo)] );
} ## end if (1)
if (1) {
my $foo = 5;
my @foo = ( -10, \$foo );
my %foo = ( a => 1, b => \$foo, c => \@foo );
$foo{d} = \%foo;
$foo[2] = \%foo;
test( [ \%foo ], [qw($foo)] );
} ## end if (1)
if (1) {
my @dogs = qw( Fido Wags );
my %kennel = (
First => \$dogs[0],
Second => \$dogs[1],
);
$dogs[2] = \%kennel;
my $mutts = \%kennel;
eval {
test( [ \@dogs, \%kennel, $mutts ], [qw($dogs $kennel $mutts)] );
1;
}
or die "Eval failed: $EVAL_ERROR";
} ## end if (1)
if (1) {
my $a = [];
$a->[1] = \$a->[0];
test( [$a], [qw($a)] );
}
if (1) {
my $a = \\\\\'foo';
my $b = ${ ${$a} };
test( [ $a, $b ], [qw($a $b)] );
}
if (1) {
## no critic (Variables::RequireLocalizedPunctuationVars)
my $b;
my $a = [ { a => \$b }, { b => undef } ];
$b = [ { c => \$b }, { d => \$a } ];
test( [ $a, $b ], [qw($a $b)] );
} ## end if (1)
if (1) {
my $a = [ [ [ [ \\\\\'foo' ] ] ] ];
my $b = $a->[0][0];
my $c = ${ ${ $b->[0][0] } };
test( [ $a, $b, $c ], [qw($a $b $c)] );
} ## end if (1)
if (1) {
my $f = 'pearl';
my $e = [$f];
my $d = { 'e' => $e };
my $c = [$d];
my $b = { 'c' => $c };
my $a = { 'b' => $b };
test( [ $a, $b, $c, $d, $e, $f ], [qw($a $b $c $d $e $f)] );
} ## end if (1)
if (1) {
## no critic (Variables::RequireLocalizedPunctuationVars)
my $a;
$a = \$a;
my $b = [$a];
test( [$b], [qw($b)] );
} ## end if (1)
## Test from Randal Schwartz
if (1) {
my $x = bless { fred => 'flintstone' }, 'x';
my $y = bless \$x, 'y';
test( [ $x, $y ], [qw($x $y)] );
}
## no critic (Subroutines::RequireArgUnpacking)
sub test {
my $input = Data::Dumper->new(@_)->Purity(1)->Sortkeys(1)->Dumpxs;
# Table by type and name of data
# All data is kept as refs.
# For orthogonality, that includes scalars.
%SYMTAB = ();
@OUTPUT = ();
my $value_ref = $parser->parse( \$input );
if ( not defined $value_ref ) {
die 'Perl parse failed';
}
my @pointers = ();
my @names = ();
for my $type ( sort keys %SYMTAB ) {
my $sigil =
$type eq 'SCALAR' ? q{$}
: $type eq 'REF' ? q{$}
: $type eq 'ARRAY' ? q{@}
: $type eq 'HASH' ? q{@}
: q{!};
my $symbols_by_name = $SYMTAB{$type};
for my $name ( sort keys %{$symbols_by_name} ) {
my $ref = $symbols_by_name->{$name};
# The testing convention is to pass scalars directly
$type eq 'SCALAR' and $ref = ${$ref};
push @pointers, $ref;
push @names, "$sigil$name";
} ## end for my $name ( sort keys %{$symbols_by_name} )
} ## end for my $type ( sort keys %SYMTAB )
my $output =
Data::Dumper->new( \@pointers, \@names )->Purity(1)->Sortkeys(1)
->Dumpxs;
Test::More::is( $output, $input );
return;
} ## end sub test
## use critic
1; # In case used as "do" file