# Factory class for continuation threaded backtracking search subs.
=for
XXXXXXXXXXXXXXXx fix naming
{code,sub}_{code,body,sub}_...
Method naming:
config_ deal with configuration info.
code_ return code. core only?
source_ return code by wrapping up codes.
sub_ return sub by wrapping up subs.
_vars emphasizes that _only_ the vars given are temp/let-ized.
Todo:
code_sub(...,@extra_info) => "sub {...}", and for debugging, Sub::Name, etc.
is the code_/source_ distinction clear?
=cut
package Regexp::Engine::Fribble::Noop;
# an empty class for labeling noop functions.
package Regexp::Engine::Fribble::Make;
use strict; use warnings;#HMM - disable in the code evals?
sub new {
my $cls = shift;
bless {},$cls;
}
sub config_changed {
my($o)=@_;
foreach my $k (keys %$o) {
next if $k !~ /^cached_/;
delete $o->{$k};
}
$o;
}
sub config_set {
my($o,$name,$v)=@_;
my $k = "config_$name";
$o->{$k} = $v;
$o->config_changed;
}
# want debugging id. both config_ and cached_.
sub _eval_code {
my($o,$code)=@_;
my $noop = $o->sub_noop;
my $res = eval($code);
die $@.$code if $@;
$res;
}
sub code_call {
my($o,$f,@args)=@_;#HMM - $args?
push(@args,'$noop') if !@args;
$f.'->('.join(',',@args).')';
}
sub code_tailcall {
my($o,$f,@args)=@_;#HMM - $args?
push(@args,'$noop') if !@args;
'@_=('.join(',',@args)."); goto \&{$f};";
}
sub code_tailcall_safely { # goto apparently sometimes kills lexical vars.
my($o,$f,@args)=@_;#HMM - $args?
push(@args,'$noop') if !@args;
#$o->code_tailcall($f,@args);
'return('.$o->code_call($f,@args).');';
}
sub code_noop_is {
my($o,$var)=@_;
"(ref($var) eq 'Regexp::Engine::Fribble::Noop')";
}
sub code_fail {
my($o,$var)=@_;
$var = "undef" if not defined $var;
"return($var)";
}
sub code_fail_is {
my($o,$var)=@_;
"(!defined($var) || (!ref($var) && $var <= 0))";
}
sub code_fail_isnot {
my($o,$var)=@_;
"(!".$o->code_fail_is($var).")";
}
sub code_fail_is_exception {
my($o,$var)=@_;
"(defined($var) && !ref($var) && $var <= 0)";
}
sub code_fail_propagate {
my($o,$var)=@_;
'if('.$o->code_fail_is($var).') {'.$o->code_fail($var).'}';
}
sub code_fail_propagate_exceptions {
my($o,$var)=@_;
'if('.$o->code_fail_is_exception($var).') {'.$o->code_fail($var).'}';
}
sub sub_noop {
my($o)=@_;
if(not exists $o->{cached_noop}) {
my $noop;
my $code = 'bless sub{
my $c = $_[0];
return 1 if '.$o->code_noop_is('$c').';
'.$o->code_tailcall('$c','$noop').";
}, 'Regexp::Engine::Fribble::Noop';";
# $code = 'bless sub{
# my $c = $_[0];
##print STDERR "noop",ref($c),$c;
##Carp::confess;
# return 1 if (ref($c) eq \'Regexp::Engine::Fribble::Noop\');
# @_=($noop); goto &{$c};;
# }, \'Regexp::Engine::Fribble::Noop\';';
# print $code;exit;
$noop = $o->{cached_noop} = eval($code); die $@.$code if $@;
# print STDERR "noop ",$noop,"\n";
}
$o->{cached_noop};
}
sub sub_fail {
my($o,@n)=@_;
my $key = "cached_fail_".(@n?$n[0]:'undef');
if(not exists $o->{$key}) {
my $code = "sub{ ".$o->code_fail(@n)."; }";
$o->{$key} = $o->_eval_code($code);
}
$o->{$key};
}
sub code_subwrap {
my($o,$infosrc,$subsrc)=@_;
return $subsrc;
my $pkg = __PACKAGE__;
"\&${pkg}::_subwrap($infosrc,$subsrc)";
}
sub _subwrap {
my($info,$f)=@_;
print STDERR $info," ",$f,"\n";
$f;
}
sub genstr {
sprintf("%x",int(rand(1_000_000_000)));
}
sub gensym {
my($o,$stem)=@_;
$stem = '$gensym_' if not defined $stem;
$stem.$o->genstr;
}
sub source_temp_vars {
my($o,$vars,$body)=@_;
my $varl = join(",",@$vars);
return ("(do{ $body })") if $varl eq "";
return ("(do{ local($varl)=($varl); $body })");
}
sub source_let_vars {
my($o,$vars,$body,@rest)=@_;
$o->source_alt_vars($vars,[$body],@rest);
}
sub source_alt_vars {
my($o,$vars,$bodies,$code_on_success,$code_on_failure,$code_on_each_failure)=@_;
my $uniq = $o->genstr;
my $v = '$v_'.$uniq;
my $ok = '$ok_'.$uniq;
my $tmpstem = '$tmp'; my $tmpcnt = 0;
my $tmpl = join(",",map{$tmpstem.($tmpcnt++).'_'.$uniq}@$vars);
my $varl = join(",",@$vars);
my $success = "";
my $failure = "";
my $eachfail = "";
$success = "my \$_v_ = $v; $code_on_success" if defined $code_on_success;
$failure = "{ my \$_v_ = $v; $code_on_failure }" if defined $code_on_failure;
$eachfail = "my \$_v_ = $v; $code_on_each_failure" if defined $code_on_each_failure;
#$failure is {}'ed to avoid conflicts with $eachfail's definition of $_v_.
my $vars_setup = "my($tmpl)=($varl);";
my $vars_reset = "($varl)=($tmpl);";
my $vars_local = "local($varl)=($varl);";
my $vars_save = "($tmpl)=($varl);";
my $vars_set = "($varl)=($tmpl);";
if($varl eq '') {
$vars_setup = $vars_reset = $vars_local = $vars_save = $vars_set = "";
}
if(@$bodies == 0) { $bodies = ['undef'] }
if(@$bodies == 1) {
$vars_setup = "my($tmpl);";
$vars_reset = "";
}
my $code = $failure;
for my $body (reverse @$bodies) {
$code = ("\n $v = do{ $body };"
." if(".$o->code_fail_is($v).") { $eachfail $vars_reset $code }"
." else {$ok = 1; $vars_save }");
}
$code = "
(do{
my $v; my $ok; $vars_setup
{ $vars_local $code }
if($ok){ $vars_set $success }
$v
})";
}
sub source_concat {
my($o,$bodies)=@_;
join("",map{"do{ $_ };\n"}@$bodies);
}
sub source_repeat {
my($o,$body,$min,$max,$nongreedy)=@_;
$min = 0 if !defined $min;
my $code = "";
for (1..$min) {$code .= "{ $body }\n";}
}
sub config_backtrack_vars {
my($o,$val)=@_;
$o->config_set('backtrack_vars',$val) if defined $val;
$o->{config_backtrack_vars};
}
sub sub_temp {
my($o,$f)=@_;
my $key = 'cached_sub_temp';
if(not exists $o->{$key}) {
my $code = '
#line 2 "cached_sub_temp"
sub {
my($o,$f)=@_;
'.$o->code_subwrap("'$key'",'sub {
my $c = $_[0];
'.$o->source_temp_vars($o->config_backtrack_vars,
$o->code_tailcall('$f','$c')).'
}').';
}';
$o->{$key} = $o->_eval_code($code);
}
$o->{$key}($o,$f);
}
sub sub_let {
my($o,$f)=@_;
my $key = 'cached_sub_let';
if(not exists $o->{$key}) {
my $code = '
sub {
my($o,$f)=@_;
sub {
my $c = $_[0];
'.$o->source_let_vars($o->config_backtrack_vars,
$o->code_call('$f','$c')).'
};
}';
$o->{$key} = $o->_eval_code($code);
}
$o->{$key}($o,$f);
}
sub sub_alt {
my($o,$afs)=@_;
my $key = 'cached_sub_alt';
if(not exists $o->{$key}) {
#XXX- "Useless use of private variable in void context at sub_alt line 15"
#end of the let. XXX - let should do something about line numbering?
my $code = '
#line 2 "sub_alt"
sub {
my($o,$afs)=@_;
my @fs = @$afs;
'.$o->code_subwrap("'sub_alt'",'sub {
my $c = $_[0];
for my $f (@fs) {
'.$o->source_let_vars($o->config_backtrack_vars,
$o->code_call('$f','$c'),
'return $_v_',undef,
$o->code_fail_propagate_exceptions('$_v_')).'
}
'.$o->code_fail.';
}').';
}';
$o->{$key} = $o->_eval_code($code);
}
$o->{$key}($o,$afs);
}
sub sub_alt_dynamic {#TODO-refactor back into sub_alt?
my($o,$afs)=@_;
my $key = 'cached_sub_alt_dynamic';
if(not exists $o->{$key}) {
my $code = '
#line 2 "sub_alt_dynamic"
sub {
my($o,$afs)=@_;
'.$o->code_subwrap("'sub_alt_dynamic'",'sub {
my $c = $_[0];
my @fs = @$afs;
for my $f (@fs) {
'.$o->source_let_vars($o->config_backtrack_vars,
$o->code_call('$f','$c'),
'return $_v_',undef,
$o->code_fail_propagate_exceptions('$_v_')).'
}
'.$o->code_fail.';
}').';
}';
$o->{$key} = $o->_eval_code($code);
}
$o->{$key}($o,$afs);
}
sub sub_concat_v1_broken {
my($o,$afs)=@_;
my $key = 'cached_sub_concat_v1';
if(not exists $o->{$key}) {
my $code = '
#line 2 "sub_concat"
sub {
my($o,$afs)=@_;
my @fs = @$afs;
if(@fs == 0) {
return $o->noop;
}
elsif(@fs == 1) {
return $fs[0];
}
# elsif(@fs == 2) {
# my($f0,$f1)=@fs;
# return sub {
# my $c = $_[0];
# '.$o->code_tailcall('$f0','sub{'.$o->code_tailcall('$f1','$c').'}').';
# };
# }
my $f0 = shift @fs;
@fs = reverse @fs;
my $start = @fs-1;
'.$o->code_subwrap("'sub_concat'",'sub {
my $c = $_[0];
# print STDERR "sub_concat c=$c\n";
my $i = $start; #BZZZZT. would have to be local()ly passed, uniquified.
my $next;
$next = sub {
# print STDERR "sub_concat next fs[$i]=$fs[$i]\n";
if($i > 0) {
'.$o->code_tailcall('$fs[$i--]','$next').';
} else {
'.$o->code_tailcall('$fs[$i]','$c').';
}
};
# print STDERR "sub_concat next: ",$next," f0=$f0\n";
'.$o->code_tailcall_safely('$f0','$next').';
}').';
}';
$o->{$key} = $o->_eval_code($code);
}
$o->{$key}($o,$afs);
}
sub sub_concat { # XXX - currently ignoring the code_tailcall abstractions
my($o,$afs)=@_;
my $key = 'cached_sub_concat_v0';
if(not exists $o->{$key}) {
my $code = '
sub {
my($o,$afs)=@_;
my @fs = @$afs;
return $o->noop if @fs == 0;
return $fs[0] if @fs == 1;
my $code1 = ""; my $code2 = "";
my $code0 = "my \$f0 = \$fs[0]; ";
for my $i (reverse(1..$#fs)) {
$code0 .= "my \$f$i = \$fs[$i]; ";
$code1 .= "sub{\@_=";
$code2 .= ";goto \&\$f$i}";
}
my $code = $code0."\n sub{my \$cn = \$_[0]; \@_=".$code1."\$cn".$code2.";goto \&\$f0}\n";
#print $code;
eval($code) || die $@.$code;
}
';
$o->{$key} = $o->_eval_code($code);
}
$o->{$key}($o,$afs);
}
sub sub_repeat_v0_disposable {
my($o,$f,$min,$max)=@_;
$min = 0 if !defined $min;
$max = (1000**1000**1000) if !defined $max;
$min += 0; $max += 0;
my $key = 'cached_sub_repeat';
if(not exists $o->{$key}) {
my $code = '
#line 2 "cached_sub_repeat"
sub {
my($o,$f,$min,$max)=@_;
'.$o->code_subwrap("'$key'",'sub{
my $c = $_[0];
my $pos_old = -1;
my $i = 0;
my($fmin,$fagain,$frest);
$fmin = sub{
# print "$i $X::pos fmin\n";
if($i >= $min) {
goto &$fagain;
}
# print "$i $X::pos fmin tailcalling f\n";
$i++; '.$o->code_tailcall('$f','$fmin').'
};
$fagain = sub{
# print "$i $X::pos fagain\n";
if($pos_old >= '.$o->config_pos_var.'){
# print "$i $X::pos fagain NO PROGRESS tailcal c\n";
'.$o->code_tailcall('$c').';
}
$pos_old = '.$o->config_pos_var.';
goto &$frest;
};
$frest = sub{
# print "$i $X::pos frest\n";
if($i >= $max) {
# print "$i $X::pos frest BEYOND MAX tailcalling c\n";
'.$o->code_tailcall('$c').';
}
$i++;
# print "$i $X::pos frest about to call f in let\n";
my $v = '.$o->source_let_vars($o->config_backtrack_vars,
$o->code_call('$f','$fagain')).';
# print "$i $X::pos frest back. success? ",defined($v)?"returning $v":"undef","\n";
return $v if '.$o->code_fail_isnot('$v').';
# print "$i $X::pos frest no. tailcalling c\n";
'.$o->code_tailcall('$c').';
};
#'.$o->code_tailcall('$fmin').';
die "bug" if not defined($min) or not defined($max);
# print "start $min $max $X::pos\n";
goto &$fmin;
# $fmin->();
}').'
}';
# print STDERR $code;exit;
$o->{$key} = $o->_eval_code($code);
}
$o->{$key}($o,$f,$min,$max);
}
sub sub_repeat {
my($o,$f,$min,$max,$ng)=@_;
$min = 0 if !defined $min;
$max = (1000**1000**1000) if !defined $max;
$min += 0; $max += 0;
$ng = $ng ? 'nongreedy' : 'greedy';
Carp::confess "sub_repeat: min cant be greater than max" if $min > $max;
my $stem = 'cached_sub_repeat_';
my $key = 'cached_sub_repeat_'.$ng;
if(not exists $o->{$key}) {
my $gen_code = sub {
my($first,$second)=@_;
return '
#line 2 "cached_sub_repeat"
sub {
my($o,$f,$min,$max)=@_;
'.$o->code_subwrap("'$key'",'sub{
my $c = $_[0];
my $pos_old = -1;
my $i = 0;
my($fmin,$fagain,$frest);
$fmin = sub{
if($i >= $min) {
goto &$fagain;
}
$i++; '.$o->code_tailcall('$f','$fmin').'
};
$fagain = sub{
if($pos_old >= '.$o->config_pos_var.'){
'.$o->code_tailcall('$c').';
}
$pos_old = '.$o->config_pos_var.';
goto &$frest;
};
$frest = sub{
if($i >= $max) {
'.$o->code_tailcall('$c').';
}
$i++;
my $v = '.$o->source_let_vars($o->config_backtrack_vars,
$o->code_call(@$first)).';
return $v if '.$o->code_fail_isnot('$v').';
'.$o->code_tailcall(@$second).';
};
goto &$fmin;
}').'
}';
};
my $recurse = ['$f','$fagain'];
my $continue= ['$c'];
my $code_g = $gen_code->($recurse,$continue);
my $code_ng = $gen_code->($continue,$recurse);
$o->{$stem.'greedy'} = $o->_eval_code($code_g);
$o->{$stem.'nongreedy'} = $o->_eval_code($code_ng);
}
$o->{$key}($o,$f,$min,$max,$ng);
}
#package Regexp::Engine::Fribble::MakeUtilMixin;
package Regexp::Engine::Fribble::MakeStringSearch;
use base 'Regexp::Engine::Fribble::Make';
sub new {
my $o = shift->SUPER::new(@_);
# $o->config_backtrack_vars([$o->config_pos_var]);
}
sub config_str_var {
my($o,$val)=@_;
$o->config_set('str_var',$val) if defined $val;
$o->{config_str_var};
}
sub config_pos_var {
my($o,$val)=@_;
$o->config_set('pos_var',$val) if defined $val;
$o->{config_pos_var};
}
sub config_match_var {
my($o,$val)=@_;
$o->config_set('match_var',$val) if defined $val;
$o->{config_match_var};
}
sub config_cap_var {
my($o,$val)=@_;
$o->config_set('cap_var',$val) if defined $val;
$o->{config_cap_var};
}
sub config_flag_var {
my($o,$val)=@_;
$o->config_set('flag_var',$val) if defined $val;
$o->{config_flag_var};
}
sub sub_eat_regexp {
my($o,$re)=@_;
Carp::confess if !defined($re);
my $str = $o->config_str_var;
my $pos = $o->config_pos_var;
#print "Making eater for /$re/.\n";
my $code = "
#line 2 \"sub_eat_regexp\"
".$o->code_subwrap("'sub_eat_regexp'","sub {
my \$c = \$_[0];
pos($str) = $pos;
$str =~ /\\G($re)/ or ".$o->code_fail.";
$pos += length(\$1);
".$o->code_tailcall('$c')."
}");
# print \"Trying to match /$re/ against '$str' pos $pos.\\n\"; # bad if $re contains \n
$o->_eval_code($code);
}
sub sub_capture_variant1 {
my($o,$idx,$f)=@_;
my $key = 'cached_capture_variant1';
if(not exists $o->{$key}) {
my $pos = $o->config_pos_var;
my $str = $o->config_str_var;
my $cap = $o->config_cap_var;
my $code = '
#line 2 "sub_capture_variant1"
sub {
my($o,$idx,$f)=@_;
sub {
my $c = $_[0];
my $m = MatchOne->new();
my $from = '.$pos.';
my $close = sub {
my $c0 = $_[0];
my $to = '.$pos.';
$m->match_set(1,substr('.$str.',$from,$to-$from),[],{},$from,$to);
'.$o->code_tailcall('$c0','$c').'
};
return '.$o->source_let_vars([@{$o->config_backtrack_vars},$cap],
$cap.' = [@{'.$cap.'}];
'.$cap.'->[$idx] = $m;
my $v = $f->($close);
$m->match_set_as_failed if '.$o->code_fail_is('$v').';
$v;').'
}
}';
$o->{$key} = $o->_eval_code($code);
}
$o->{$key}($o,$idx,$f);
}
sub sub_match {
my($o)=@_;
my $key = 'cached_match';
if(not exists $o->{$key}) {
my $pos = $o->config_pos_var;
my $str = $o->config_str_var;
my $cap = $o->config_cap_var;
my $mat = $o->config_match_var;
my $code = '
sub {
my($r,$s)=@_;
my $len = length($s);
for my $start (0..$len) {
local '.$str.' = $s;
local '.$pos.' = $start;
local '.$cap.' = [];
my $m = MatchOne->new();
local '.$mat.' = $m;
my $ok = $r->($noop);
if('.$o->code_fail_isnot('$ok').') {
my $a = '.$cap.';
$m->match_set(1,substr('.$str.',$start,'.$pos.'-$start),
$a,\%{$m},$start,'.$pos.');
return $m;
}
}
return MatchOne->new()->match_set_as_failed;
}';
$o->{$key} = $o->_eval_code($code);
}
$o->{$key};
}
1;
__END__