The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Factory class for continuation threaded backtracking search subs.
=for

XXXXXXXXXXXXXXXx fix naming
{code,sub}_{code,body,sub}_...

Method naming:
 config_ deal with configuration info.
 code_ return code.  core only?
 source_ return code by wrapping up codes.
 sub_ return sub by wrapping up subs.
 _vars emphasizes that _only_ the vars given are temp/let-ized.

Todo:
 code_sub(...,@extra_info) => "sub {...}", and for debugging, Sub::Name, etc.
 is the code_/source_ distinction clear?
 
=cut

package Regexp::Engine::Fribble::Noop;
# an empty class for labeling noop functions.

package Regexp::Engine::Fribble::Make;
use strict; use warnings;#HMM - disable in the code evals?

sub new {
  my $cls = shift;
  bless {},$cls;
}
sub config_changed {
  my($o)=@_;
  foreach my $k (keys %$o) {
    next if $k !~ /^cached_/;
    delete $o->{$k};
  }
  $o;
}
sub config_set {
  my($o,$name,$v)=@_;
  my $k = "config_$name";
  $o->{$k} = $v;
  $o->config_changed;
}
# want debugging id.  both config_ and cached_.

sub _eval_code {
  my($o,$code)=@_;
  my $noop = $o->sub_noop;
  my $res = eval($code);
  die $@.$code if $@;
  $res;
}
sub code_call {
  my($o,$f,@args)=@_;#HMM - $args?
  push(@args,'$noop') if !@args;
  $f.'->('.join(',',@args).')';
}
sub code_tailcall {
  my($o,$f,@args)=@_;#HMM - $args?
  push(@args,'$noop') if !@args;
  '@_=('.join(',',@args)."); goto \&{$f};";
}
sub code_tailcall_safely { # goto apparently sometimes kills lexical vars.
  my($o,$f,@args)=@_;#HMM - $args?
  push(@args,'$noop') if !@args;
  #$o->code_tailcall($f,@args);
  'return('.$o->code_call($f,@args).');';
}
sub code_noop_is {
  my($o,$var)=@_;
  "(ref($var) eq 'Regexp::Engine::Fribble::Noop')";
}
sub code_fail {
  my($o,$var)=@_;
  $var = "undef" if not defined $var;
  "return($var)";
}
sub code_fail_is {
  my($o,$var)=@_;
  "(!defined($var) || (!ref($var) && $var <= 0))";
}
sub code_fail_isnot {
  my($o,$var)=@_;
  "(!".$o->code_fail_is($var).")";
}
sub code_fail_is_exception {
  my($o,$var)=@_;
  "(defined($var) && !ref($var) && $var <= 0)";
}
sub code_fail_propagate {
  my($o,$var)=@_;
  'if('.$o->code_fail_is($var).') {'.$o->code_fail($var).'}';
}
sub code_fail_propagate_exceptions {
  my($o,$var)=@_;
  'if('.$o->code_fail_is_exception($var).') {'.$o->code_fail($var).'}';
}

sub sub_noop {
  my($o)=@_;
  if(not exists $o->{cached_noop}) {
    my $noop;
    my $code = 'bless sub{
      my $c = $_[0];
      return 1 if '.$o->code_noop_is('$c').';
      '.$o->code_tailcall('$c','$noop').";
    }, 'Regexp::Engine::Fribble::Noop';";
#    $code = 'bless sub{
#      my $c = $_[0];
##print STDERR "noop",ref($c),$c;
##Carp::confess;
#      return 1 if (ref($c) eq \'Regexp::Engine::Fribble::Noop\');
#      @_=($noop); goto &{$c};;
#    }, \'Regexp::Engine::Fribble::Noop\';';
#    print $code;exit;
    $noop = $o->{cached_noop} = eval($code); die $@.$code if $@;
#    print STDERR "noop ",$noop,"\n";
  }
  $o->{cached_noop};
}
sub sub_fail {
  my($o,@n)=@_;
  my $key = "cached_fail_".(@n?$n[0]:'undef');
  if(not exists $o->{$key}) {
    my $code = "sub{ ".$o->code_fail(@n)."; }";
    $o->{$key} = $o->_eval_code($code);
  }
  $o->{$key};
}

sub code_subwrap {
  my($o,$infosrc,$subsrc)=@_;
  return $subsrc;
  my $pkg = __PACKAGE__;
  "\&${pkg}::_subwrap($infosrc,$subsrc)";
}
sub _subwrap {
  my($info,$f)=@_;
  print STDERR $info," ",$f,"\n";
  $f;
}

sub genstr {
  sprintf("%x",int(rand(1_000_000_000)));
}
sub gensym {
  my($o,$stem)=@_;
  $stem = '$gensym_' if not defined $stem;
  $stem.$o->genstr;
}

sub source_temp_vars {
  my($o,$vars,$body)=@_;
  my $varl = join(",",@$vars);
  return ("(do{ $body })") if $varl eq "";
  return ("(do{ local($varl)=($varl); $body })");
}
sub source_let_vars {
  my($o,$vars,$body,@rest)=@_;
  $o->source_alt_vars($vars,[$body],@rest);
}
sub source_alt_vars {
  my($o,$vars,$bodies,$code_on_success,$code_on_failure,$code_on_each_failure)=@_;
  my $uniq = $o->genstr;
  my $v  = '$v_'.$uniq;
  my $ok = '$ok_'.$uniq;
  my $tmpstem = '$tmp'; my $tmpcnt = 0;
  my $tmpl = join(",",map{$tmpstem.($tmpcnt++).'_'.$uniq}@$vars);
  my $varl = join(",",@$vars);

  my $success = "";
  my $failure = "";
  my $eachfail = "";
  $success = "my \$_v_ = $v; $code_on_success" if defined $code_on_success;
  $failure = "{ my \$_v_ = $v; $code_on_failure }" if defined $code_on_failure;
  $eachfail = "my \$_v_ = $v; $code_on_each_failure" if defined $code_on_each_failure;
  #$failure is {}'ed to avoid conflicts with $eachfail's definition of $_v_.

  my $vars_setup = "my($tmpl)=($varl);";
  my $vars_reset = "($varl)=($tmpl);";
  my $vars_local = "local($varl)=($varl);";
  my $vars_save  = "($tmpl)=($varl);";
  my $vars_set   = "($varl)=($tmpl);";
  if($varl eq '') {
    $vars_setup = $vars_reset = $vars_local = $vars_save = $vars_set = "";
  }
  if(@$bodies == 0) { $bodies = ['undef'] }
  if(@$bodies == 1) {
    $vars_setup = "my($tmpl);";
    $vars_reset = "";
  }

  my $code = $failure;
  for my $body (reverse @$bodies) {
    $code = ("\n $v = do{ $body };"
            ." if(".$o->code_fail_is($v).") { $eachfail $vars_reset $code }"
            ." else {$ok = 1; $vars_save }");
  }  
  $code = "
  (do{
    my $v; my $ok; $vars_setup
    { $vars_local $code }
    if($ok){ $vars_set $success }
    $v
  })";
}  
sub source_concat {
  my($o,$bodies)=@_;
  join("",map{"do{ $_ };\n"}@$bodies);
}
sub source_repeat {
  my($o,$body,$min,$max,$nongreedy)=@_;
  $min = 0 if !defined $min;
  my $code = "";
  for (1..$min) {$code .= "{ $body }\n";}    
        
}


sub config_backtrack_vars {
  my($o,$val)=@_;
  $o->config_set('backtrack_vars',$val) if defined $val;
  $o->{config_backtrack_vars};
}

sub sub_temp {
  my($o,$f)=@_;
  my $key = 'cached_sub_temp';
  if(not exists $o->{$key}) {
    my $code = '
#line 2 "cached_sub_temp"
    sub {
      my($o,$f)=@_;
      '.$o->code_subwrap("'$key'",'sub {
        my $c = $_[0];
        '.$o->source_temp_vars($o->config_backtrack_vars,
                               $o->code_tailcall('$f','$c')).'
      }').';
    }';
    $o->{$key} = $o->_eval_code($code);
  }
  $o->{$key}($o,$f);
}
sub sub_let {
  my($o,$f)=@_;
  my $key = 'cached_sub_let';
  if(not exists $o->{$key}) {
    my $code = '
    sub {
      my($o,$f)=@_;
      sub {
        my $c = $_[0];
        '.$o->source_let_vars($o->config_backtrack_vars,
                              $o->code_call('$f','$c')).'
      };
    }';
    $o->{$key} = $o->_eval_code($code);
  }
  $o->{$key}($o,$f);
}
sub sub_alt {
  my($o,$afs)=@_;
  my $key = 'cached_sub_alt';
  if(not exists $o->{$key}) {
#XXX- "Useless use of private variable in void context at sub_alt line 15"
#end of the let.  XXX - let should do something about line numbering?
    my $code = '
#line 2 "sub_alt"
    sub {
      my($o,$afs)=@_;
      my @fs = @$afs;
      '.$o->code_subwrap("'sub_alt'",'sub {
        my $c = $_[0];
        for my $f (@fs) {
          '.$o->source_let_vars($o->config_backtrack_vars,
                                $o->code_call('$f','$c'),
                                'return $_v_',undef,
                                $o->code_fail_propagate_exceptions('$_v_')).'
        }
        '.$o->code_fail.';
      }').';
    }';
    $o->{$key} = $o->_eval_code($code);
  }
  $o->{$key}($o,$afs);
}
sub sub_alt_dynamic {#TODO-refactor back into sub_alt?
  my($o,$afs)=@_;
  my $key = 'cached_sub_alt_dynamic';
  if(not exists $o->{$key}) {
    my $code = '
#line 2 "sub_alt_dynamic"
    sub {
      my($o,$afs)=@_;
      '.$o->code_subwrap("'sub_alt_dynamic'",'sub {
        my $c = $_[0];
        my @fs = @$afs;
        for my $f (@fs) {
          '.$o->source_let_vars($o->config_backtrack_vars,
                                $o->code_call('$f','$c'),
                                'return $_v_',undef,
                                $o->code_fail_propagate_exceptions('$_v_')).'
        }
        '.$o->code_fail.';
      }').';
    }';
    $o->{$key} = $o->_eval_code($code);
  }
  $o->{$key}($o,$afs);
}

sub sub_concat_v1_broken {
  my($o,$afs)=@_;
  my $key = 'cached_sub_concat_v1';
  if(not exists $o->{$key}) {
    my $code = '
#line 2 "sub_concat"
    sub {
      my($o,$afs)=@_;
      my @fs = @$afs;
      if(@fs == 0) {
        return $o->noop;
      }
      elsif(@fs == 1) {
        return $fs[0];
      }
      # elsif(@fs == 2) {
      #   my($f0,$f1)=@fs;
      #   return sub {
      #     my $c = $_[0];
      #     '.$o->code_tailcall('$f0','sub{'.$o->code_tailcall('$f1','$c').'}').';
      #   };
      # }
      my $f0 = shift @fs;
      @fs = reverse @fs;
      my $start = @fs-1;
      '.$o->code_subwrap("'sub_concat'",'sub {
        my $c = $_[0];
#	print STDERR "sub_concat c=$c\n";
        my $i = $start; #BZZZZT. would have to be local()ly passed, uniquified.
        my $next;
        $next = sub {
#  	  print STDERR "sub_concat next fs[$i]=$fs[$i]\n";
          if($i > 0) {
            '.$o->code_tailcall('$fs[$i--]','$next').';
          } else {
            '.$o->code_tailcall('$fs[$i]','$c').';
          }
        };
#	print STDERR "sub_concat next: ",$next," f0=$f0\n";
        '.$o->code_tailcall_safely('$f0','$next').';
    }').';
    }';
    $o->{$key} = $o->_eval_code($code);
  }
  $o->{$key}($o,$afs);
}
sub sub_concat { # XXX - currently ignoring the code_tailcall abstractions
  my($o,$afs)=@_;
  my $key = 'cached_sub_concat_v0';
  if(not exists $o->{$key}) {
    my $code = '
      sub {
        my($o,$afs)=@_;
	my @fs = @$afs;
	return $o->noop if @fs == 0;
	return $fs[0] if @fs == 1;
	my $code1 = ""; my $code2 = "";
	my $code0 = "my \$f0 = \$fs[0]; ";
	for my $i (reverse(1..$#fs)) {
	  $code0 .= "my \$f$i = \$fs[$i]; ";
	  $code1 .= "sub{\@_=";
	  $code2 .= ";goto \&\$f$i}";
	}
	my $code = $code0."\n sub{my \$cn = \$_[0]; \@_=".$code1."\$cn".$code2.";goto \&\$f0}\n";
	#print $code;
	eval($code) || die $@.$code;
      }
    ';
    $o->{$key} = $o->_eval_code($code);
  }
  $o->{$key}($o,$afs);
}

sub sub_repeat_v0_disposable {
  my($o,$f,$min,$max)=@_;
  $min = 0 if !defined $min;
  $max = (1000**1000**1000) if !defined $max;
  $min += 0; $max += 0;
  my $key = 'cached_sub_repeat';
  if(not exists $o->{$key}) {
    my $code = '
#line 2 "cached_sub_repeat"
    sub {
      my($o,$f,$min,$max)=@_;
      '.$o->code_subwrap("'$key'",'sub{
        my $c = $_[0];
        my $pos_old = -1;
        my $i = 0;
        my($fmin,$fagain,$frest);
        $fmin = sub{
#  print  "$i $X::pos fmin\n";
          if($i >= $min) {
            goto &$fagain;
          }
#  print  "$i $X::pos fmin tailcalling f\n";
          $i++; '.$o->code_tailcall('$f','$fmin').'
        };
        $fagain = sub{
#  print  "$i $X::pos fagain\n";
          if($pos_old >= '.$o->config_pos_var.'){
#  print  "$i $X::pos fagain NO PROGRESS tailcal c\n";
            '.$o->code_tailcall('$c').';
          }
          $pos_old = '.$o->config_pos_var.';
          goto &$frest;
        };
        $frest = sub{
#  print  "$i $X::pos frest\n";
          if($i >= $max) {
#  print  "$i $X::pos frest BEYOND MAX tailcalling c\n";
            '.$o->code_tailcall('$c').';
          }
          $i++;
#  print  "$i $X::pos frest about to call f in let\n";
          my $v = '.$o->source_let_vars($o->config_backtrack_vars,
                                      $o->code_call('$f','$fagain')).';
#  print  "$i $X::pos frest back.   success? ",defined($v)?"returning $v":"undef","\n";
          return $v if '.$o->code_fail_isnot('$v').';
#  print  "$i $X::pos frest no. tailcalling c\n";
          '.$o->code_tailcall('$c').';
        };
        #'.$o->code_tailcall('$fmin').';
  die "bug" if not defined($min) or not defined($max);
#  print  "start $min $max  $X::pos\n";
        goto &$fmin;
#        $fmin->();
      }').'
    }';
#  print STDERR $code;exit;
    $o->{$key} = $o->_eval_code($code);
  }
  $o->{$key}($o,$f,$min,$max);
}
sub sub_repeat {
  my($o,$f,$min,$max,$ng)=@_;
  $min = 0 if !defined $min;
  $max = (1000**1000**1000) if !defined $max;
  $min += 0; $max += 0;
  $ng = $ng ? 'nongreedy' : 'greedy';
  Carp::confess "sub_repeat: min cant be greater than max" if $min > $max;
  my $stem = 'cached_sub_repeat_';
  my $key =  'cached_sub_repeat_'.$ng;
  if(not exists $o->{$key}) {
    my $gen_code = sub {
      my($first,$second)=@_;
      return '
#line 2 "cached_sub_repeat"
      sub {
        my($o,$f,$min,$max)=@_;
        '.$o->code_subwrap("'$key'",'sub{
          my $c = $_[0];
          my $pos_old = -1;
          my $i = 0;
          my($fmin,$fagain,$frest);
          $fmin = sub{
            if($i >= $min) {
              goto &$fagain;
            }
            $i++; '.$o->code_tailcall('$f','$fmin').'
          };
          $fagain = sub{
            if($pos_old >= '.$o->config_pos_var.'){
              '.$o->code_tailcall('$c').';
            }
            $pos_old = '.$o->config_pos_var.';
            goto &$frest;
          };
          $frest = sub{
            if($i >= $max) {
              '.$o->code_tailcall('$c').';
            }
            $i++;
            my $v = '.$o->source_let_vars($o->config_backtrack_vars,
                                          $o->code_call(@$first)).';
            return $v if '.$o->code_fail_isnot('$v').';
            '.$o->code_tailcall(@$second).';
          };
          goto &$fmin;
        }').'
      }';
    };
    my $recurse = ['$f','$fagain'];
    my $continue= ['$c'];
    my $code_g  = $gen_code->($recurse,$continue);
    my $code_ng = $gen_code->($continue,$recurse);
    $o->{$stem.'greedy'}    = $o->_eval_code($code_g);
    $o->{$stem.'nongreedy'} = $o->_eval_code($code_ng);
  }
  $o->{$key}($o,$f,$min,$max,$ng);
}




#package Regexp::Engine::Fribble::MakeUtilMixin;


package Regexp::Engine::Fribble::MakeStringSearch;
use base 'Regexp::Engine::Fribble::Make';

sub new {
  my $o = shift->SUPER::new(@_);
#  $o->config_backtrack_vars([$o->config_pos_var]);
}
sub config_str_var {
  my($o,$val)=@_;
  $o->config_set('str_var',$val) if defined $val;
  $o->{config_str_var};
}
sub config_pos_var {
  my($o,$val)=@_;
  $o->config_set('pos_var',$val) if defined $val;
  $o->{config_pos_var};
}
sub config_match_var {
  my($o,$val)=@_;
  $o->config_set('match_var',$val) if defined $val;
  $o->{config_match_var};
}
sub config_cap_var {
  my($o,$val)=@_;
  $o->config_set('cap_var',$val) if defined $val;
  $o->{config_cap_var};
}
sub config_flag_var {
  my($o,$val)=@_;
  $o->config_set('flag_var',$val) if defined $val;
  $o->{config_flag_var};
}

sub sub_eat_regexp {
  my($o,$re)=@_;
  Carp::confess if !defined($re);
  my $str = $o->config_str_var;
  my $pos = $o->config_pos_var;
  #print "Making eater for /$re/.\n";
  my $code = "
#line 2 \"sub_eat_regexp\"
  ".$o->code_subwrap("'sub_eat_regexp'","sub {
    my \$c = \$_[0];
    pos($str) = $pos;
    $str =~ /\\G($re)/ or ".$o->code_fail.";
    $pos += length(\$1);
    ".$o->code_tailcall('$c')."
  }");
#    print \"Trying to match /$re/ against '$str' pos $pos.\\n\"; # bad if $re contains \n
  $o->_eval_code($code);
}

sub sub_capture_variant1 {
  my($o,$idx,$f)=@_;
  my $key = 'cached_capture_variant1';
  if(not exists $o->{$key}) {
    my $pos = $o->config_pos_var;
    my $str = $o->config_str_var;
    my $cap = $o->config_cap_var;
    my $code = '
#line 2 "sub_capture_variant1"
    sub {
      my($o,$idx,$f)=@_;
      sub {
        my $c = $_[0];
        my $m = MatchOne->new();
        my $from = '.$pos.';
        my $close = sub {
          my $c0 = $_[0];
          my $to = '.$pos.';
          $m->match_set(1,substr('.$str.',$from,$to-$from),[],{},$from,$to);
          '.$o->code_tailcall('$c0','$c').'
        };
        return '.$o->source_let_vars([@{$o->config_backtrack_vars},$cap],
             $cap.' = [@{'.$cap.'}];
           '.$cap.'->[$idx] = $m;
             my $v = $f->($close);
             $m->match_set_as_failed if '.$o->code_fail_is('$v').';
             $v;').'
      }
    }';
    $o->{$key} = $o->_eval_code($code);
  }
  $o->{$key}($o,$idx,$f);
}

sub sub_match {
  my($o)=@_;
  my $key = 'cached_match';
  if(not exists $o->{$key}) {
    my $pos = $o->config_pos_var;
    my $str = $o->config_str_var;
    my $cap = $o->config_cap_var;
    my $mat = $o->config_match_var;
    my $code = '
    sub {
      my($r,$s)=@_;
      my $len = length($s);
      for my $start (0..$len) {
        local '.$str.' = $s;
        local '.$pos.' = $start;
        local '.$cap.' = [];
        my $m = MatchOne->new();
        local '.$mat.' = $m;
        my $ok = $r->($noop);
        if('.$o->code_fail_isnot('$ok').') {
          my $a = '.$cap.';
          $m->match_set(1,substr('.$str.',$start,'.$pos.'-$start),
                  $a,\%{$m},$start,'.$pos.');
          return $m;
        }
      }
      return MatchOne->new()->match_set_as_failed;
    }';
    $o->{$key} = $o->_eval_code($code);
  }
  $o->{$key};
}

1;
__END__