#!/usr/bin/perl -w
use strict;
use Parse::Eyapp::YATW;
use Parse::Eyapp::Node;
use Parse::Eyapp::Treeregexp;
use Carp;
use Getopt::Long;
use Pod::Usage;
my $infile;
my $outfile;
my $packagename;
my $prefix = '';
my $syntax = 1;
my $numbers = 1;
my @PERL5LIB; # search path
my $severity = 0; # 0 = Don't check arity. 1 = Check arity. 2 = Check arity and give a warning 3 = ... and croak
GetOptions(
'in=s' => \$infile,
'out=s' => \$outfile,
'mod=s' => \$packagename,
'prefix=s' => \$prefix,
'severity=i' => \$severity,
'syntax!' => \$syntax,
'numbers!' => \$numbers,
'lib=s' => \@PERL5LIB,
'version' => \&version,
'usage' => \&usage,
'help' => \&man,
) or croak usage();
# filename packagename outputfile
($infile) = @ARGV unless defined($infile);
die usage() unless defined($infile);
$infile = "$infile.trg" unless -r $infile;
unless (defined($outfile)) {
if ($infile =~ /(.*)\.trg$/) {
$outfile = "$1.pm";
}
else {
$outfile = "$infile.pm"
}
}
my $treeparser = Parse::Eyapp::Treeregexp->new(
INFILE => $infile,
OUTPUTFILE => $outfile,
PACKAGE => $packagename,
PREFIX => $prefix,
SYNTAX => $syntax,
NUMBERS => $numbers,
PERL5LIB => \@PERL5LIB,
SEVERITY => $severity
);
$treeparser->generate();
###### Support subroutines ######
sub version {
if (defined($Parse::Eyapp::Treeregparser::VERSION)) {
print "Version $Parse::Eyapp::Treeregparser::VERSION\n";
}
else {
print "Unknown version\n";
}
exit;
}
sub usage {
print <<"END_ERR";
Supply the name of a file containing a tree grammar (.trg)
Usage is:
treereg [-m packagename] [[no]syntax] [[no]numbers] [-severity 0|1|2|3] \
[-p treeprefix] [-o outputfile] -i filename[.trg]
END_ERR
exit;
}
sub man {
pod2usage(
-exitval => 1,
-verbose => 2
);
}
__END__
=head1 NAME
treereg - Compiler for Tree Regular Expressions
=head1 SYNOPSIS
treereg [-m packagename] [[no]syntax] [[no]numbers] [-severity 0|1|2|3] \
[-p treeprefix] [-o outputfile] [-lib /path/to/library/] -i filename[.trg]
treereg [-m packagename] [[no]syntax] [[no]numbers] [-severity 0|1|2|3] \
[-p treeprefix] [-lib /path/to/library/] [-o outputfile] filename[.trg]
treereg -v
treereg -h
=head1 OPTIONS
Options can be used both with one dash and double dash.
It is not necessary to write the full name of the option.
A disambiguation prefix suffices.
=over
=item * C<-i[n] filename>
Input file. Extension C<.trg> is assumed if no extension is provided.
=item * C<-o[ut] filename>
Output file. By default is the name of the input file (concatenated with .pm)
=item * C<-m[od] packagename>
Name of the package containing the generated subroutines.
By default is the longest prefix of the input file name that
conforms to the classic definition of integer C<[a-z_A-Z]\w*>.
=item * C<-l[ib] /path/to/library/>
Specifies that C</path/to/library/> will be included in C<@INC>.
Useful when the C<syntax> option is on. Can be inserted as many times as necessary.
=item * C<-p[refix] treeprefix>
Tree nodes automatically generated using C<Parse::Eyapp> are objects blessed
into the name of the production. To avoid crashes the programmer may prefix
the class names with a given prefix when calling the parser; for example:
$self->YYParse( yylex => \&_Lexer, yyerror => \&_Error, yyprefix => __PACKAGE__."::")
The C<-prefix treeprefix> option simplifies the process of writing the tree
grammar so that instead of writing with the full names
CLASS::TIMES(CLASS::NUM, $x) and { $NUM->{VAL} == 0) => { $NUM }
it can be written:
TIMES(NUM, $x) and { $NUM->{VAL} == 0) => { $NUM }
=item * C<-n[umbers]>
Produces C<#line> directives.
=item * C<-non[umbers]>
Disable source file line numbering embedded in your parser
=item * C<-sy[ntax]>
Checks that Perl code is syntactically correct.
=item * C<-nosy[ntax]>
Does not check the syntax of Perl code
=item * C<-se[verity] number>
=over 2
=item - 0 = Don't check arity (default). Matching does not check the arity. The actual node
being visited may have more children.
=item - 1 = Check arity. Matching requires the equality of the number of children
and the actual node and the pattern.
=item - 2 = Check arity and give a warning
=item - 3 = Check arity, give a warning and exit
=back
=item * C<-v[ersion]>
Gives the version
=item * C<-u[sage]>
Prints the usage info
=item * C<-h[elp]>
Print this help
=back
=head1 DESCRIPTION
C<Treereg> translates a tree grammar specification
file (default extension C<.trg> describing
a set of tree patterns
and the actions to modify them
using tree-terms like:
TIMES(NUM, $x) and { $NUM->{VAL} == 0) => { $NUM }
which says that wherever an abstract syntax tree representing
the product of a numeric expression with value 0 times
any other kind of expression, the C<TIMES> tree can be substituted by
its left child.
The compiler produces a Perl module containing the subroutines
implementing those sets of pattern-actions.
=head1 EXAMPLE
Consider the following C<eyapp> grammar (see the C<Parse::Eyapp> documentation
to know more about C<Parse::Eyapp> grammars):
----------------------------------------------------------
nereida:~/LEyapp/examples> cat Rule6.yp
%{
use Data::Dumper;
%}
%right '='
%left '-' '+'
%left '*' '/'
%left NEG
%tree
%%
line: exp { $_[1] }
;
exp: %name NUM
NUM
| %name VAR
VAR
| %name ASSIGN
VAR '=' exp
| %name PLUS
exp '+' exp
| %name MINUS
exp '-' exp
| %name TIMES
exp '*' exp
| %name DIV
exp '/' exp
| %name UMINUS
'-' exp %prec NEG
| '(' exp ')' { $_[2] } /* Let us simplify a bit the tree */
;
%%
sub _Error {
die "Syntax error.\n";
}
sub _Lexer {
my($parser)=shift;
$parser->YYData->{INPUT}
or $parser->YYData->{INPUT} = <STDIN>
or return('',undef);
$parser->YYData->{INPUT}=~s/^\s+//;
for ($parser->YYData->{INPUT}) {
s/^([0-9]+(?:\.[0-9]+)?)// and return('NUM',$1);
s/^([A-Za-z][A-Za-z0-9_]*)// and return('VAR',$1);
s/^(.)//s and return($1,$1);
}
}
sub Run {
my($self)=shift;
$self->YYParse( yylex => \&_Lexer, yyerror => \&_Error );
}
----------------------------------------------------------
Compile it using C<eyapp>:
----------------------------------------------------------
nereida:~/LEyapp/examples> eyapp Rule6.yp
nereida:~/LEyapp/examples> ls -ltr | tail -1
-rw-rw---- 1 pl users 4976 2006-09-15 19:56 Rule6.pm
----------------------------------------------------------
Now consider this tree grammar:
----------------------------------------------------------
nereida:~/LEyapp/examples> cat Transform2.trg
%{
my %Op = (PLUS=>'+', MINUS => '-', TIMES=>'*', DIV => '/');
%}
fold: 'TIMES|PLUS|DIV|MINUS':bin(NUM($n), NUM($m))
=> {
my $op = $Op{ref($bin)};
$n->{attr} = eval "$n->{attr} $op $m->{attr}";
$_[0] = $NUM[0];
}
zero_times_whatever: TIMES(NUM($x), .) and { $x->{attr} == 0 } => { $_[0] = $NUM }
whatever_times_zero: TIMES(., NUM($x)) and { $x->{attr} == 0 } => { $_[0] = $NUM }
/* rules related with times */
times_zero = zero_times_whatever whatever_times_zero;
----------------------------------------------------------
Compile it with C<treereg>:
----------------------------------------------------------
nereida:~/LEyapp/examples> treereg Transform2.trg
nereida:~/LEyapp/examples> ls -ltr | tail -1
-rw-rw---- 1 pl users 1948 2006-09-15 19:57 Transform2.pm
----------------------------------------------------------
The following program makes use of both modules C<Rule6.pm>
and C<Transform2.pm>:
----------------------------------------------------------
nereida:~/LEyapp/examples> cat foldand0rule6_3.pl
#!/usr/bin/perl -w
use strict;
use Rule6;
use Parse::Eyapp::YATW;
use Data::Dumper;
use Transform2;
$Data::Dumper::Indent = 1;
my $parser = new Rule6();
my $t = $parser->Run;
print "\n***** Before ******\n";
print Dumper($t);
$t->s(@Transform2::all);
print "\n***** After ******\n";
print Dumper($t);
----------------------------------------------------------
When the program runs with input C<b*(2-2)> produces the following output:
----------------------------------------------------------
nereida:~/LEyapp/examples> foldand0rule6_3.pl
b*(2-2)
***** Before ******
$VAR1 = bless( {
'children' => [
bless( {
'children' => [
bless( { 'children' => [], 'attr' => 'b', 'token' => 'VAR' }, 'TERMINAL' )
]
}, 'VAR' ),
bless( {
'children' => [
bless( { 'children' => [
bless( { 'children' => [], 'attr' => '2', 'token' => 'NUM' }, 'TERMINAL' )
]
}, 'NUM' ),
bless( {
'children' => [
bless( { 'children' => [], 'attr' => '2', 'token' => 'NUM' }, 'TERMINAL' )
]
}, 'NUM' )
]
}, 'MINUS' )
]
}, 'TIMES' );
***** After ******
$VAR1 = bless( {
'children' => [
bless( { 'children' => [], 'attr' => 0, 'token' => 'NUM' }, 'TERMINAL' )
]
}, 'NUM' );
----------------------------------------------------------
See also the section L<Parse::Eyapp/Compiling: More Options> for a more
contrived example.
=head1 SEE ALSO
=over
=item * L<Parse::Eyapp>,
=item * L<eyapptut>
=item * The pdf file in L<http://nereida.deioc.ull.es/~pl/perlexamples/Eyapp.pdf>
=item * L<http://nereida.deioc.ull.es/~pl/perlexamples/section_eyappts.html> (Spanish),
=item * L<eyapp>,
=item * L<treereg>,
=item * L<Parse::yapp>,
=item * yacc(1),
=item * bison(1),
=item * the classic book "Compilers: Principles, Techniques, and Tools" by Alfred V. Aho, Ravi Sethi and
=item * Jeffrey D. Ullman (Addison-Wesley 1986)
=item * L<Parse::RecDescent>.
=back
=head1 AUTHOR
Casiano Rodriguez-Leon
=head1 LICENSE AND COPYRIGHT
Copyright (C) 2006 by Casiano Rodriguez-Leon
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.