The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!perl
# $Id: build_parser.pl,v 1.2 2013/01/04 23:03:42 Paulo Exp $

#------------------------------------------------------------------------------
# Create Parse::FSM::Parse to parse a yacc-like grammar
#------------------------------------------------------------------------------

use strict;
use warnings;

use Parse::FSM;
use File::Slurp;
use File::Basename;

@ARGV == 2 or die "Usage: ",basename($0)," MODULE FILE\n";
my($module, $file) = @ARGV; 

my $fsm = Parse::FSM->new;

#------------------------------------------------------------------------------
# prolog
$fsm->prolog(q{
use Data::Dump 'dump';
use Text::Balanced 'extract_quotelike', 'extract_codeblock';

my $uid = 1;

});

#------------------------------------------------------------------------------
# epilog
$fsm->epilog(q{
# access the FSM object
sub fsm { return shift->user->{fsm} }

# read the given input string
sub from {
	my($self, $line) = @_;
	
	$self->input(sub {
		for ($line) {
			/\G(?:\s+|#.*)+/gc;		# skip blanks and comments
			
			/\G([a-z]\w*)/gci and do {
				return [NAME => $1];
			};
			/\G(?=["'])/gc and do {
				my $start_pos = pos();
				my($quoted_string, $rest) = extract_quotelike($_);
				if (defined $quoted_string) {
					my $token = eval($quoted_string); ## no critic
					if (! $@) {
						pos() = length() - length($rest);
						return [TOKEN => $token];
					}
				}
				
				# could not parse quoted string, die
				$rest = substr($_, $start_pos, 100);
				die "Cannot parse quoted string at ", dump($rest), "\n";
			};
			/\G(?=[\{])/gc and do {
				my $start_pos = pos();
				my($code_block, $rest) = extract_codeblock($_);
				if (defined $code_block) {
					pos() = length() - length($rest);
					return [CODE => $code_block];
				}
				
				# could not parse quoted string, die
				$rest = substr($_, $start_pos, 100);
				die "Cannot parse code block at ", dump($rest), "\n";
			};
			/\G(%\w+)/gc and do {					# directives
				return [$1 => $1];
			};
			/\G(<\+)\s*([^>\s]+)\s*>/gc and do {	# list quantifier
				return [$1 => $2];					# ['<+' => ',']
			};
			/\G(<\w+)/gc and do {					# directive
				return [$1 => $1];					# ['<start' => '<start']
			};
			/\G(.)/gc and do {
				return [$1 => $1];
			};
			return;	# end of input
		}
	});
	return;
}

});

#------------------------------------------------------------------------------
# program : list of rules
$fsm->add_rule('program', '[prolog]?', '[statement]+', '[epilog]?', '', '{}');

# prolog
$fsm->add_rule('prolog', '[code_unbraced]', q{{
				my $code = $item[0];
				$self->fsm->prolog($code);
				return;
			}});
				
# epilog
$fsm->add_rule('epilog', '[code_unbraced]', q{{
				my $code = $item[0];
				$self->fsm->epilog($code);
				return;
			}});
				
# code section with braces removed
$fsm->add_rule('code_unbraced', '[code]', q{{
				my $code = $item[0];
				$code =~ s/\A\s*\{//;	# remove start ...
				$code =~ s/\}\s*\z//;	# ... and end braces
				return $code;
			}});

# statement : rule | directive ;
$fsm->add_rule('statement', '[rule]', '{}');
$fsm->add_rule('statement', '[directive]', '{}');

# rule : name sentence<+|> ';'
$fsm->add_rule('rule', '[name]', ':', '[sentence]<+|>', ';', q{{
				my $name = $item[0];
				my $sentences = $item[2];
				for my $sentence (@$sentences) {
					$self->fsm->add_rule($name, @$sentence);
				}
				return;
			}});

# sentence : element+ action
$fsm->add_rule('sentence', '[element]+', '[action]', q{{
				my $elements = $item[0];
				my $action   = $item[1];
				return [@$elements, $action];
			}});

# element : token | subrule | <eof> | <leftop> | <rightop>
$fsm->add_rule('element', '[token]',			'{ return $item[0] }');
$fsm->add_rule('element', '[subrule]',			'{ return $item[0] }');
$fsm->add_rule('element', '<eof', '>',			'{ return ""       }');

# <list> separated by tokens
$fsm->add_rule('element', '<list', ':', 
					'[subrule_name]', '[token]', '[subrule_name]', '>',
			q{{
				my $operand1  = $item[2];
				my $operator  = $item[3];
				my $operand2  = $item[4];
				my $name      = "_anon".($uid++);
				my $name_opt  = "_anon".($uid++);
				
				# create rule for repetion of (operator operand2)
				$self->fsm->add_rule($name_opt, 
									$operator, "[$operand2]",
									'{return [$item[0][0], $item[1]]}');
				
				# create rule for : operand1 (opt_rule)*
				$self->fsm->add_rule($name, 
									"[$operand1]", "[$name_opt]*",
									q{{
										my @ret = ($item[0]);
										for (@{$item[1]}) {
											push @ret, @$_;
										}
										return \@ret;
									}});
				
				# return rule name
				return "[$name]";
			}});

# <list> separated by rules
$fsm->add_rule('element', '<list', ':', 
					'[subrule_name]', '[subrule_name]', '[subrule_name]', '>',
			q{{
				my $operand1  = $item[2];
				my $operator  = $item[3];
				my $operand2  = $item[4];
				my $name      = "_anon".($uid++);
				my $name_opt  = "_anon".($uid++);
				
				# create rule for repetion of (operator operand2)
				$self->fsm->add_rule($name_opt, 
									"[$operator]", "[$operand2]",
									'{return [$item[0], $item[1]]}');
				
				# create rule for : operand1 (opt_rule)*
				$self->fsm->add_rule($name, 
									"[$operand1]", "[$name_opt]*",
									q{{
										my @ret = ($item[0]);
										for (@{$item[1]}) {
											push @ret, @$_;
										}
										return \@ret;
									}});
				
				# return rule name
				return "[$name]";
			}});

# subrule
$fsm->add_rule('subrule', '[subrule_name]', '[quantifier]?', q{{
				my $name  = $item[0];
				my $quant = $item[1];
				my $ret = "[$name]";
				$ret .= $quant->[0] if @$quant;
				return $ret;
			}});

# subrule name : either name or () surrounded anonymous rule
$fsm->add_rule('subrule_name', '[name]', '{ return $item[0] }');
$fsm->add_rule('subrule_name', '(', '[sentence]<+|>', ')', q{{
				my $name = "_anon".($uid++);
				my $sentences = $item[1];
				for my $sentence (@$sentences) {
					$self->fsm->add_rule($name, @$sentence);
				}
				return $name;
			}});

# quantifier
$fsm->add_rule('quantifier', '?',  '{return $item[0][1]}');
$fsm->add_rule('quantifier', '*',  '{return $item[0][1]}');
$fsm->add_rule('quantifier', '+',  '{return $item[0][1]}');
$fsm->add_rule('quantifier', '<+', '{return "<+$item[0][1]>"}');

# action
$fsm->add_rule('action', '[code]?',	q{{
				if (@{$item[0]}) {			# code block supplied
					return $item[0][0];		
				}
				else {						# default action
					return q{{
								if (@item == 1) {	# special case: one element
									return $item[0];	# drop one array level
								}
								else {
									return \@item;
								}
							}};
				}
			}});

# directive
$fsm->add_rule('directive', '<start', ':', '[name]', '>', q{{
				my $name = $item[2];
				$self->fsm->start_rule($name);
				return;
			}});

# terminals
$fsm->add_rule('name', 		'NAME', 	'{ return $item[0][1] }');
$fsm->add_rule('token',		'TOKEN',	'{ return $item[0][1] }');
$fsm->add_rule('code',		'CODE',		'{ return $item[0][1] }');

#------------------------------------------------------------------------------
# write_module
$fsm->write_module($module, $file, 'hidden_module');