The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

no warnings 'once';

package Parse::Gnaw;

use 5.006;
use strict;
use warnings FATAL => 'all';

use Data::Dumper;
use Carp ('cluck','confess');
use Storable qw(nstore dclone retrieve);

our $VERSION = '0.601';

# this package doesn't play nice.
# it uses eval("") to create variables in the caller's namespace.
# if the caller uses these variables, they might get warnings about
# some variable only used once.
#
# to disable that warning, we need to do a 
# no warnings 'once';
# except that's lexical and we can't do that here 
#
# on the other hand, if we have an import function and from import call this:
#	warnings->unimport("once");
# then the no warnings gets pulled into the calling package.
#
# We want to use the Exporter.pm module, but we can't "use" it like this:
#	use Exporter
# because that creates a conflict when we declare our own import method.
# So, instead, we put Exporter in our @ISA and then we can define our import method.
# 
# then from inside import, we call export_to_level to do the importing stuff for us
# that Exporter normally does, but now we're doing it inside an import sub
# which then allows us to call 
#	warnings->unimport("once");
#
# see"
# http://perldoc.perl.org/Exporter.html#Exporting-without-using-Exporter%27s-import-method
# and
# http://mail.pm.org/mailman/private/boston-pm/2013-May/014850.html
# From:   	"Ben Tilly" <btilly@gmail.com>
# Date:   	Sat, May 4, 2013 6:46 pm
# If your module has an import method, and in that method calls
# warnings->unimport("once") then the unimport should be lexically
# scoped to where your package was used.


our @ISA = qw(Exporter);
our @EXPORT = qw ( rule predeclare lit call cc notcc thrifty alt );

sub import {
	warnings->unimport("once");
	strict->unimport("vars");
	Parse::Gnaw->export_to_level(1,@_);
}


our $debug=0;

sub format_package{
	my $callerindex = 0;
	while(1){
		my @caller=caller($callerindex++);
		my $package =$caller[0];
		if($package =~ m{Parse::Gnaw}){

		} else {
			return $package;
		}
	}
}

sub format_filename{
	my $callerindex = 0;
	while(1){
		my @caller=caller($callerindex++);
		my $package =$caller[0];
		my $filename=$caller[1];
		if($package =~ m{Parse::Gnaw}){

		} else {
			return $filename;
		}
	}

}

sub format_linenum{
	my $callerindex = 0;
	while(1){
		my @caller=caller($callerindex++);
		my $package =$caller[0];
		my $linenum =$caller[2];
		if($package =~ m{Parse::Gnaw}){

		} else {
			return $linenum;
		}
	}
}


sub eval_string{
	my $string=shift(@_);

	if($debug){
		my @caller=caller(1);
		my $filename=$caller[1];
		my $linenum =$caller[2];
		print "eval_string('$string') called from $filename, line $linenum\n";
	}

	my $eval_return;

	eval($string);
	if($@){
		die $@;
	}

	return $eval_return;
}


sub get_ref_to_rulebook{
	my($package,$createifnotexist)=@_;
	
	if($debug){
		my @caller=caller(1);
		my $filename=$caller[1];
		my $linenum =$caller[2];
		print "called get_ref_to_rulebook($package) from $filename at $linenum\n";
	}

	my $retval = eval_string("\$eval_return = \$".$package."::rulebook;");

	if(defined($retval) and (ref($retval) eq 'HASH')){
		return $retval;
	}

	if($createifnotexist){
		$retval=eval_string('$'.$package."::rulebook={}; \$eval_return = \$".$package.'::rulebook;');
		return $retval;
	}

	return;
}


sub get_ref_to_rulename{
	my($package,$rulename,$createifnotexist)=@_;
	
	if($debug){
		my @caller=caller(1);
		my $filename=$caller[1];
		my $linenum =$caller[2];
		print "called get_ref_to_rulename($package,$rulename) from $filename at $linenum\n";
	}

	my $package_rulename = $package.'::'.$rulename;

	my $retval = eval_string("no warnings 'once'; \$eval_return = \$".$package_rulename.";");

	if(defined($retval) and (ref($retval) eq 'ARRAY')){
		return $retval;
	}

	if($createifnotexist){
		my $ruleref=eval_string('$'.$package_rulename."=[]; \$eval_return = \$".$package_rulename.";");

		# put it in the rulebook.
		my $bookref = get_ref_to_rulebook($package,1);
		$bookref->{$rulename}=$ruleref;

		return $ruleref;
	}

	return;
}



sub process_first_arguments_and_return_hash_ref{

	#print Dumper \@_; warn "process_first_arguments_and_return_hash_ref arguments (above)";


	# first parameter string is same as payload=> key in hash
	# need to know for error checking this:
	# ('myrule',{method=>'rule', payload=>'myrule'})
	# and need to know if hash doesn't exist or doesn't have the key for first parameter.
	# i.e. this
	# ('myrule')
	# needs to return this:
	# {payload=>'myrule'}
	# 
	# on the other hand, if we're processing inputs to the lit() function, then first parameter is the literal
	# lit('a',{payload=>'a'});
	#
	# the methodname is the subroutine name to call to execute the grammar.
	# the methodname for a rule is 'rule'
	# the methodname for a literal is 'lit'
	# the methodname for a thrifty quantifier is 'thrifty'
	#
	# every hash methodname -> methodvalue should have a corresponding
	# payload -> loadvalue combination.
	# for example a literal might look like this: { methodname=>'lit', payload=>'a' };
	# the methodname tells us it is a 'lit'. payload tells us we're looking for the letter 'a'.
	my $methodname=shift(@_);

	# passing in a reference so we can shift data off the array, and affect the array in the caller space as well.
	my $argref = shift(@_);
	unless(ref($argref) eq 'ARRAY'){
		confess "ERROR: called process_first_arguments_and_return_hash_ref, second argument should be an array reference, found $argref instead ";
	}


	my $parm_payload;
	if(not(ref($argref->[0]))){
		$parm_payload=shift(@$argref);
	}

	my $package = format_package();
	my $source_filename = format_filename();
	my $source_linenum  = format_linenum();

	my $info_href;
	if(ref($argref->[0]) eq 'HASH'){
		my $orig_href=shift(@$argref);
		$info_href = dclone $orig_href;
	} else {
		$info_href={};
	}

	if(defined($parm_payload)){
		if(exists($info_href->{payload})){

			# passed in process ( 'a' { payload=>'a' } ) both 'a's must match.
			my $hash_payload=$info_href->{payload};
			unless($parm_payload eq $hash_payload){
				print Dumper $info_href;
				confess "ERROR: process_first_arguments_and_return_hash_ref parm_payload does not equal hash_payload $methodname ($parm_payload ne $hash_payload)";
			}
		} else {
			# passed in parm_payload and do not have hash_payload. So, put it in hash.
			# process ('a', {} ) 
			$info_href->{payload}=$parm_payload;
		} 
	} else {
		# parm_payload is NOT passed in as string, MUST be defined in hash
		# if we don't say process ('a', {} ), then we must say  process ( { payload => 'a' } )
		unless(exists($info_href->{payload})){
			confess("ERROR: process_first_arguments_and_return_hash_ref without providing a $methodname anywhere");
		}

	}

	# handle the rest of the defaults;
	unless(exists($info_href->{package})){$info_href->{package}		=$package;}
	unless(exists($info_href->{filename})){$info_href->{filename}		=$source_filename;;}
	unless(exists($info_href->{linenum})){$info_href->{linenum}		=$source_linenum;}
	unless(exists($info_href->{methodname})){$info_href->{methodname}	=$methodname;}

	return $info_href;
}


sub copy_location_info_and_make_new_hash_ref{
	my($orig_href)=@_;

	# first copy over only the keys we want. this is a one-deep copy.
	# if any hash values point to other references, those need to a deep copy.
	my $one_deep_copy={};

	foreach my $key ('package', 'filename', 'linenum'){
		$one_deep_copy->{$key}=$orig_href->{$key}
	}

	# make a deep copy of just these keys
	my $full_separate_copy = dclone $one_deep_copy;

	return $full_separate_copy;
}



#######################################################################
#######################################################################
#######################################################################
sub rule {
#######################################################################
#######################################################################
#######################################################################
	my $argref=[@_];


	if($debug){print "called rule, \@_ is: "; print Dumper \@_; warn " ";}


	my $info_href=process_first_arguments_and_return_hash_ref('rule', $argref);
	if($debug){print "called rule ";print Dumper $info_href; warn " ";}

	my $rulename = $info_href->{payload};
	my $package  = $info_href->{package};
	my $filename = $info_href->{filename};
	my $linenum  = $info_href->{linenum};

	unless(exists($info_href->{quantifier})){
		$info_href->{quantifier}='';
	}

	if($rulename =~ m{\:\:}){
		confess "ERROR: called rule and passed in a package name rule '$rulename'. Rulenames should not contain '::'";
	}
	
	my $rulebook = get_ref_to_rulebook($package,1);

	if(exists($rulebook->{$rulename})){
		my $oldruleinfo=$rulebook->{$rulename}->[0];

		#print Dumper $oldruleinfo; die;
		my $hash_info = $oldruleinfo->[2];
		my $oldmethod= $hash_info->{methodname};
		if($oldmethod eq 'predeclare'){

		} else {

			warn "warning: redefining rule '$rulename' for package '$package'";

			# element ->[0] in rule array is the 'rule' method. element ->[1] in 'rule' method is the info_href.
			print "original rule: "; print Dumper $rulebook->{$rulename}->[0]->[1]; 
			print "new rule: "; print Dumper $info_href;
		}
	}
	
	my $currentrule = get_ref_to_rulename($package,$rulename,1);

	# empty out the array for the rule
	@$currentrule = ();

	# first index into rule array is a "ruleinfo" marker to indicate info about this rule
	# such as rulename, where it came from, and other information.
	push(@$currentrule, ['rule',$rulename, $info_href]);


	# now go through the subrules and format them properly.
	# a big thing to do is convert strings like 'a' into [ 'lit', 'a', {info} ]
	# this allows a rule to be a lot less verbose.
	my $index=-1;
	while(@$argref){
		$index++;

		if($debug){warn "shifting element of 'rule', index $index";}

		my $subrule=shift(@$argref);

		my $isnumber=0;
		my $isstring=0;
		my $isarray=0;
		my $ishash=0;

		my $ref=ref($subrule);
		if($ref){
			if($ref eq 'ARRAY'){
				$isarray=1;
			} elsif($ref eq 'HASH'){
				$ishash=1;
			}
		}else{
			no warnings 'numeric';
			if($subrule eq $subrule+0){	
				$isnumber=1;	
			}else{
				$isstring=1;
			}
		}

		my @subrules=();

		# if subrule is 'a', convert that to a literal subrule.
		if($isstring){
			if($debug){warn "subrule is string '$subrule'";}

			# make a copy of hash ref and use that for lit() otherwise the original info_href gets tainted.
			my $location_href=copy_location_info_and_make_new_hash_ref($info_href);
			@subrules = lit($subrule,  $location_href);

	
		# if subrule is an array reference, then fill in the hash ref with any info the caller didn't have.
		} elsif($isarray){
			if($debug){warn "subrule is array "; print Dumper $subrule; warn " ";}
			my ($method,$payload,$subinfo)=@$subrule;
			$subinfo=process_first_arguments_and_return_hash_ref($method,[$payload,$subinfo]);
			@subrules = ( [$method,$payload,$subinfo] );

		# if its a hashref, then 'method' key points to a value like 'lit'.
		# and 'lit' will poitn to the actual payload such as 'a'.
		# and the rest will contain whatever location info caller passed in.
		#} elsif($ishash){
		#	my $method=$subrule->{method};
		#	my $payload=$subrule->{$method};
		#	my $subinfo = process_first_arguments_and_return_hash_ref($method,[$payload,$subrule]);
		#	@subrules = ( [$method,$payload,$subinfo] );

		} else {
			print "\n\n\n";
			print Dumper $subrule;
			print "\n\n\n";

			confess "ERROR: dont know how to handle subrule '$subrule' at $filename, $linenum "; 
		}

		push(@$currentrule, @subrules);
	}

	# now fragment the rule so we can reorder how its called:
	fragment_a_rule($currentrule);
}






# each rule may be split up into fragments
# myrule : 'a' 'b' 'c'
# might get split up into
# myrule : 'a' myrule_fragment_2 
# myrule_fragment_2 : 'b' myrule_fragment_3
# myrule_fragment_3 : 'c'
# need to keep count of how many fragments so the rulenames for each fragment is unique
my $rulefragcntr={};

sub fragment_suffix(){'_rulefragment_'}

sub fragment_a_rule{
	my ($currentrule)=@_;
	my @subrules = @$currentrule;
	@$currentrule=();

	my $first_subrule=$subrules[0];

	my $hash_info = $first_subrule->[2];

	my $rulename=$hash_info->{payload};

	while(@subrules){
		my $subrule = shift(@subrules);
		push(@$currentrule, $subrule);

		return if(scalar(@subrules)==0);

		my $subinfo = $subrule->[2];
		my $method  = $subrule->[0];
		my $iscall  = ($method eq 'call') ? 1 : '';

		my $last_subrule= (scalar(@subrules)==0);
		

		if($iscall){

			# its a rule call. 
			# will still call the rule, but want to append a "then_call" attribute
			# everything AFTER the call will go into a new rule fragment.
			# will put a then_call to that fragment.

			my $fragment_suffix=fragment_suffix();
			my $rulename_without_suffix = $rulename;
			$rulename_without_suffix=~s{$fragment_suffix\d+}{};

			my $package = $subinfo->{package};
			my $key_for_rule_fragment_counter = $package.'::'.$rulename_without_suffix;
			unless(exists($rulefragcntr->{$key_for_rule_fragment_counter})){
				$rulefragcntr->{$key_for_rule_fragment_counter}=0;
			}
			$rulefragcntr->{$key_for_rule_fragment_counter}=$rulefragcntr->{$key_for_rule_fragment_counter}+1;
			my $rule_fragment_count = $rulefragcntr->{$key_for_rule_fragment_counter};

			my $fragrulename = $rulename_without_suffix.$fragment_suffix.$rule_fragment_count;

			my $hashforfragcall = copy_location_info_and_make_new_hash_ref( $subinfo );
			delete($hashforfragcall->{payload});

			# now that we've copied the subinfo from the call, 
			# mark the subinfo then_call attribute
			$subinfo->{then_call}=$fragrulename;

			# whatever is left goes into the rule fragment.
			rule($fragrulename, $hashforfragcall, @subrules);
			@subrules=();
		}
	}

}


# lit('hello') will turn into 5 individual lits 'h', 'e', 'l', 'l', 'o'.
# if you don't want to split them up into individual letters, use term() function instead.
#
# FYI: can call this with lit('a', {hashref with location info}); 
#
# could conceivably also call it with lit('a', {lit=>'a'}) though that would be a bit weird.
#
# could even call it with lit({method=>'lit', lit=>'a', etc})
sub lit{
	my $argref=[@_];

	if($debug){print "called lit, \@_ is: "; print Dumper \@_; warn " ";}

	my $info_href=process_first_arguments_and_return_hash_ref('lit', $argref);
	if($debug){print "called lit ";print Dumper $info_href; warn " ";}

	my $lit      = $info_href->{payload};

	my @letters=split(//,$lit);

	my @retval;

	foreach my $letter (@letters){
		my $dclone_href = dclone $info_href;
		push(@retval, ['lit', $letter, $dclone_href]);
	}
	

	return (@retval);
}


sub predeclare {
#######################################################################
#######################################################################
#######################################################################
	my $argref=[@_];


	if($debug){print "called predeclare, \@_ is: "; print Dumper \@_; warn " ";}


	my $info_href=process_first_arguments_and_return_hash_ref('predeclare', $argref);
	if($debug){print "called predeclare ";print Dumper $info_href; warn " ";}

	my $rulename = $info_href->{payload};
	my $package  = $info_href->{package};
	my $filename = $info_href->{filename};
	my $linenum  = $info_href->{linenum};

	unless(exists($info_href->{quantifier})){
		$info_href->{quantifier}='';
	}

	if($rulename =~ m{\:\:}){
		confess "ERROR: called rule and passed in a package name rule '$rulename'. Rulenames should not contain '::'";
	}
	
	my $rulebook = get_ref_to_rulebook($package,1);

	$rulebook->{$rulename}=[['predeclare', $rulename, $info_href]];
}

#######################################################################
#######################################################################
#######################################################################
sub call{
#######################################################################
#######################################################################
#######################################################################
	my $argref=[@_];

	if($debug){print "called 'call', \@_ is: "; print Dumper \@_; warn " ";}

	my $info_href=process_first_arguments_and_return_hash_ref('call', $argref);
	if($debug){print "called 'call' ";print Dumper $info_href; warn " ";}

	my $ruletocall = $info_href->{payload};
	
	my $package = $info_href->{package};

	my $rulebook = get_ref_to_rulebook($package,1);
	unless(exists($rulebook->{$ruletocall})){
		my $msg="WARNING: call passed a nonexistent rulename '$ruletocall'";
		print "$msg\n";
		cluck($msg);

	}

	return ['call', $ruletocall, $info_href ];
}






my $thriftycounter=0;

#######################################################################
#######################################################################
#######################################################################
sub thrifty{
#######################################################################
#######################################################################
#######################################################################
	my $argref=[@_];


	#print "called thrifty ";print Dumper \@_; warn " ";
	my $min_max=pop(@$argref);

	if(ref($min_max) eq 'HASH'){
		# do nothing, assume user passed in {min=>8, max=>33}
	} else {
		# user didn't pass in a hash. Create a hash, cause we need a hash.
		my ($min, $max);

		# if its an array, assume its [min,max]
		if(ref($min_max) eq 'ARRAY'){
			($min,$max)=@$min_max;

		# else, its a string, try to deal with various formats 
		}else{
			if($min_max =~ m{\A(\d+)?\,(\d+)?\Z}){
				($min,$max)=($1,$2);	
			} elsif($min_max eq '+'){
				($min,$max)=(1,-999);
			} elsif($min_max eq '*'){
				($min,$max)=(0,-999);
			} elsif($min_max eq '?'){
				($min,$max)=(0,1);
			} else {
				die "ERROR: thrifty can't handle min-max indicator '$min_max' ";
			}
		}

		# now that we've extracted min/max from array or string, create a hash.
		$min_max={min=>$min,max=>$max};
	}

	my $thrifty_rule_name = "thrifty_".(++$thriftycounter);

	$min_max->{quantifier}='thrifty';

	# now call the process function to fill in info that might be missing, like filename and linenum.
	# this call needs min_max to be a hash.
	$min_max=process_first_arguments_and_return_hash_ref('rule', [$thrifty_rule_name, $min_max]);

	if($debug){print "in THRIFTY "; print Dumper $min_max;  warn " ";}

	# remainder of @_ is the stuff for the thrifty rule.
	# create a new rule and put the quantify stuff in it.
	rule($thrifty_rule_name, $min_max, @$argref);

	# return a call to newly created thrifty rule.
	my $retval =  call($thrifty_rule_name, $min_max);

	return $retval;
}
















sub cc{
	my $argref=[@_];

	my $info_href=process_first_arguments_and_return_hash_ref('cc', $argref);
	if($debug){print "called cc ";print Dumper $info_href; warn " ";}

	# charclass is a string of characters in the class, such as 'aeiou'.
	# want to turn that into a hashref where the keys are the characters 
	# value doesn't matter, just make it a count

	my $charclass=$info_href->{payload};

	my $hash_of_letters={};
	my @chars = split(//,$charclass);
	foreach my $char (@chars){
		$hash_of_letters->{$char}++;
		if($hash_of_letters->{$char}>1){
			print Dumper $info_href;
			die "ERROR: called cc with duplicates in charclass '$charclass', duplicate is '$char'";		}
	}

	$info_href->{hash_of_letters}=$hash_of_letters;

	my $retval = ['cc', $charclass, $info_href ];

	print Dumper $retval;

	return $retval;
}

sub notcc{
	my $argref=[@_];

	my $info_href=process_first_arguments_and_return_hash_ref('notcc', $argref);
	if($debug){print "called notcc ";print Dumper $info_href; warn " ";}

	# charclass is a string of characters in the class, such as 'aeiou'.
	# want to turn that into a hashref where the keys are the characters 
	# value doesn't matter, just make it a count

	my $charclass=$info_href->{payload};

	my $hash_of_letters={};
	my @chars = split(//,$charclass);
	foreach my $char (@chars){
		$hash_of_letters->{$char}++;
		if($hash_of_letters->{$char}>1){
			print Dumper $info_href;
			die "ERROR: called notcc with duplicates in charclass '$charclass', duplicate is '$char'";		}
	}

	$info_href->{hash_of_letters}=$hash_of_letters;

	my $retval = ['notcc', $charclass, $info_href ];

	print Dumper $retval;

	return $retval;
}




my $alternatecounter=0;

# alt( [ 'a','b'], ['c','d'], ['e','f'] );
sub alt{
	my $argref=['alternates', @_];

	my $info_href=process_first_arguments_and_return_hash_ref('alt',  $argref);
	if($debug){print "called alternation ";print Dumper $info_href; warn " ";}

	$info_href->{alternates}=[];

	while(@$argref){
	 
		my $arr_ref=shift(@$argref);

		# should pass in a list of array refs. turn each one into a rule.
		unless(ref($arr_ref) eq 'ARRAY'){
			confess "ERROR: alternate should be passed a list of array references, each containing an alternate rule description. got '$arr_ref' instead";
		}

		my $alternate_rule_name = "alternate_".(++$alternatecounter);
			
		push(@{$info_href->{alternates}}, $alternate_rule_name);

		# create a new rule and put the quantify stuff in it.
		rule($alternate_rule_name, @$arr_ref);


	}


	my $retval = ['alt', 'alternates', $info_href ];

	print Dumper $retval;

	return $retval;
}



1; # End of Parse::Gnaw