The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package translate;
use t::util;
use t::utilBabel;
use Carp;
use Getopt::Long;
use Hash::AutoHash;
use List::MoreUtils qw(uniq);
use List::Util qw(min);
use Math::BaseCalc;
use POSIX qw(ceil);
use Set::Scalar;
use Test::More;
use Text::Abbrev;
use Class::AutoDB;
use Data::Babel;
use strict;
our @ISA=qw(Exporter);

our @EXPORT=qw($OPTIONS %OPTIONS @OPTIONS $OP $autodb $babel $dbh
	       @filter_subsets @output_subsets 
	       maptable_data master_data idtype2ids
               init doit make_filter);
our($OPTIONS,%OPTIONS,@OPTIONS,$OP,$autodb,$babel,$dbh,@filter_subsets,@output_subsets);

@OPTIONS=qw(op=s history validate
		user_type=s db_type=s graph_type=s link_type=s basecalc=i
		max_filters=i max_outputs=i num_maptables=i arity=i);
our %op=abbrev qw(translate count);
our %user_type=abbrev qw(installer developer);
our %db_type=abbrev qw(binary staggered basecalc);
our %graph_type=abbrev qw(star chain tree);
our %link_type=abbrev qw(starlike chainlike);

# for some options, defaults depend on graph_type x user_type
our %DEFAULTS=
  (op=>'translate',
   user_type=>'installer',
   db_type=>'binary',
   graph_type=>'star',
   basecalc=>4,
   "star$;installer"=>{max_outputs=>2,max_filters=>2,db_type=>'basecalc',
		       link_type=>'starlike',arity=>4,num_maptables=>4},
   
   "star$;developer"=>{max_outputs=>2,max_filters=>3,db_type=>'basecalc',
		       link_type=>'starlike',arity=>4,num_maptables=>4},

   "chain$;installer"=>{max_outputs=>2,max_filters=>2,db_type=>'binary',
			link_type=>'chainlike',arity=>1,num_maptables=>4},

   "chain$;developer"=>{max_outputs=>2,max_filters=>3,db_type=>'binary',
			link_type=>'chainlike',arity=>1,num_maptables=>4},

   "tree$;installer"=>{max_outputs=>1,max_filters=>1,db_type=>'staggered',
		       link_type=>'starlike',arity=>2,num_maptables=>5},
   
   "tree$;developer"=>{max_outputs=>2,max_filters=>2,db_type=>'staggered',
		       link_type=>'starlike',arity=>2,num_maptables=>7},
  );

sub init {
  my $setup=shift @_;
  $OPTIONS=get_options();
  unless ($setup) {
    $autodb=new Class::AutoDB(database=>'test'); 
    isa_ok($autodb,'Class::AutoDB','sanity test - $autodb');
    # expect 'old' to return the babel
    $babel=old Data::Babel(name=>'test',autodb=>$autodb);
    isa_ok($babel,'Data::Babel','sanity test - old Babel returned Babel object');
    my @idtypes=@{$babel->idtypes};
    my @maptables=@{$babel->maptables};
    is(scalar @maptables,$OPTIONS->num_maptables,
       'sanity test - old Babel has expected number of maptables');
    my $power_set=Set::Scalar->new(@idtypes)->power_set;
    @filter_subsets=grep {$_->size<=$OPTIONS->max_filters} $power_set->members;
    @output_subsets=grep {$_->size<=$OPTIONS->max_outputs} $power_set->members;
  } else {			# setup new database
    $autodb=new Class::AutoDB(database=>'test',create=>1); 
    isa_ok($autodb,'Class::AutoDB','sanity test - $autodb');
    cleanup_db($autodb);		# cleanup database from previous test
    Data::Babel->autodb($autodb);
    # rest of setup done by test
  }
  $dbh=$autodb->dbh;
}
# returns Hash::AutoHash
sub get_options {
  GetOptions(\%OPTIONS,@OPTIONS);
  # set defaults that don't depend on graph_type x user_type
  map {$OPTIONS{$_}=$DEFAULTS{$_} unless defined $OPTIONS{$_}} 
    qw(op user_type graph_type basecalc);
  # expand abbreviations
  for my $option (qw(op user_type db_type graph_type link_type)) {
    next unless defined $OPTIONS{$option};
    my %abbrev=eval "\%$option";
    $OPTIONS{$option}=$abbrev{$OPTIONS{$option}} or confess "illegal value for option $option";
  }
  # set defaults that depend on graph_type x user_type
  my %defaults=%{$DEFAULTS{"$OPTIONS{graph_type}$;$OPTIONS{user_type}"}};
  map {$OPTIONS{$_}=$defaults{$_} unless exists $OPTIONS{$_}} keys %defaults;
  
  # set special-case defaults
  # $OPTIONS{filter}=1 if !defined($OPTIONS{filter}) && scriptbasename=~/filter/;

  $OP=$OPTIONS{op};
  $OPTIONS=new Hash::AutoHash %OPTIONS;
}

# args are idtypes
sub doit {
  my($input_idtype,$input_ids,$filters,$output_idtypes,$file,$line)=@_;
  $filters={} unless defined $filters;
  my $ok=1;
  # get idtype names for use in label
  my $input_name=$input_idtype->name;
  my @output_names=map {$_->name} @$output_idtypes;
  my @filter_names=keys %$filters;
  
  my(@args,$label);
  if ($input_ids ne 'all') {
    @args=(input_idtype=>$input_idtype,input_ids=>$input_ids,filters=>$filters,
	   output_idtypes=>$output_idtypes);
    $label=$OPTIONS->db_type.": input=$input_name, num input_ids=".
      (defined($input_ids)? scalar(@$input_ids): 0).
	" filters=@filter_names, outputs=@output_names";
  } else {
    @args=(input_idtype=>$input_idtype,input_ids_all=>1,filters=>$filters,
	   output_idtypes=>$output_idtypes);
    $label=$OPTIONS->db_type.": input=$input_name, input_ids_all=1, filters=@filter_names, outputs=@output_names";
  }
  push(@args,validate=>1) if $OPTIONS->validate;
  my $correct=select_ur(babel=>$babel,@args);
  my $actual=$babel->$OP(@args);
  $ok&&=cmp_op_quietly($actual,$correct,$OP,"$OP $label",$file,$line);
  $ok;
}

########################################
# these functions generate data loaded into database or used in queries
########################################
# arg is maptable number
sub maptable_data {
  my($i)=@_;
  my $maptable=$babel->name2maptable('maptable_'.sprintf('%03d',$i));
  my @idtype_names=map {$_->name} @{$maptable->idtypes};
  my @data;
  unless ($OPTIONS->db_type eq 'basecalc') {
    my @series=data_series($i);	# make data series for $OPTIONS->db_type
    # for each value in series, create a row
    for my $val (@series) {
      push(@data,[map {"$_/$val"} @idtype_names]);
    }
  } else { # all strings of length @idtype_names digits over base $basecalc
    my $calc=new Math::BaseCalc(digits=>[0..$OPTIONS->basecalc-1]);
    my $numdigits=@idtype_names;
    for (my $i=0; $i<$OPTIONS->basecalc**$numdigits; $i++) {
      my @digits=split('',sprintf("%0.*i",$numdigits,$calc->to_base($i)));
      push(@data,[map {"$idtype_names[$_]/d_$digits[$_]"} 0..$numdigits-1]);
    }
  }
  # add in 'multi' rows: links are 'multi','multi'; leafs are 'multi_000','multi_001']
  push(@data,[map {/^leaf/? "$_/multi_000": "$_/multi"} @idtype_names]);
  push(@data,[map {/^leaf/? "$_/multi_001": "$_/multi"} @idtype_names]);
  \@data;
}
# arg is Master object
sub master_data {
  # my $name=ref $_[0]? $_[0]->idtype->name: 'leaf_'.sprintf('%03d',$_[0]);
  confess "obsolete call to master_data. arg must be Master object, not $_[0]" unless ref $_[0];
  my $name=$_[0]->idtype->name;
  my @series=data_series();	# make data series for $OPTIONS->db_type
  # my @series=$OPTIONS->db_type eq 'staggered'? staggered_series(): binary_series();
  # my @extras=(qw(none_000 none_001),$name=~/^leaf/? qw(multi_000 multi_001): qw(multi));
  my @multis=$name=~/^leaf/? qw(multi_000 multi_001): qw(multi);
  my @nones=qw(none_000 none_001);
  my @data=!$OPTIONS->history? (map {"${name}/$_"} (@series,@multis,@nones)):
    ((map {["_x_${name}/$_","${name}/$_"]} (@series,@multis)),
     map {["_x_${name}/$_",'NULL']} @nones);
  # wantarray? @data: \@data;
  \@data;
}
# generate input ids for IN clause. many don't match anything.
# NG 12-11-21: add histories
# arg is IdType
sub idtype2ids {
  my($idtype)=@_;
  # master_data($idtype);
  my $name=$idtype->name;
  my $prefix=!$OPTIONS->history? $name: "_x_$name";
  my @series=data_series();	# make data series for $OPTIONS->db_type
  my @extras=(qw(none_000 none_001),$name=~/^leaf/? qw(multi_000 multi_001): qw(multi));
  my @data=map {"${prefix}/$_"} (@series,@extras);
  \@data;
}

# generate series of raw values for use in maptables, masters, and IN clauses
sub data_series {
  my($i)=@_;
  eval $OPTIONS->db_type.'_series($i)';
}
sub binary_series {
  my($i)=@_;
  my @series=_binary_series($OPTIONS->num_maptables,$i);
  map {"a_$_"} @series;
}
sub staggered_series {
  my($i)=@_;
  my $last_maptable=$OPTIONS->num_maptables-1;
  defined $i?
    ((map {'b_'.sprintf('%03d',$_)} ($i..$last_maptable)),
     (map {'c_'.sprintf('%03d',$last_maptable-$_)} (0..$i))):
       (map {('b_'.sprintf('%03d',$_),'c_'.sprintf('%03d',$_))} (0..$last_maptable));
}
sub basecalc_series {
  map {"d_$_"} 0..$OPTIONS->basecalc-1;
}
sub _binary_series {
  my($bits,$my_bit)=@_;
  if (defined $my_bit) {	# return $bits-wide numbers with $my_bit set
    my $mask=1<<$my_bit;
    return map {sprintf '%0*b',$bits,$_} grep {$_&$mask} (0..2**$bits-1);
  } else {			# return all $bits-wide numbers
    return map {sprintf '%0*b',$bits,$_} (0..2**$bits-1);
  }
}

# for debugging. args are number of bits, and number to convert
sub as_binary_string {sprintf '%0*b',@_}

########################################
# these functions used by filter tests to get filter ids that generate
#   results of desired size
########################################
# make filters HASH. 
#   $filters arg is ARRAY of filter_idtypes
#   $fraction is target fraction of table cut by each filter 
#   if $multi_ok is true, okay to include 'multi' ids
#     $fraction, $multi_ok not used when db_type is basecalc
sub make_filter {
  my($input,$input_ids,$filters,$outputs,$multi_ok,$fraction)=@_;
  $input_ids=undef if $input_ids eq 'all';
  my $table=select_ur(babel=>$babel,input_idtype=>$input,input_ids=>$input_ids,
		      output_idtypes=>[@$filters,@$outputs]);
  my $filter={};
  for(my $i=0; $i<@$filters; $i++) {
    my $filter_ids=
      $OPTIONS->db_type ne 'basecalc' ?
	choose_filter_ids($table,$i+1,$multi_ok,$fraction):
	  # in basecalc db, each digit selects approx 1/basecalc rows
	  [$filters->[$i]->name.'/d_0']; # any digit would do
    @$filter_ids=map {"_x_$_"} @$filter_ids if $filters->[$i]->history;
    $filter->{$filters->[$i]->name}=$filter_ids;
    $table=grep_table($table,$i+1,$filter_ids);
  }
  $filter;
}

# choose ids from column $col of $table that approximately cut the table to $fraction
# if $multi_ok is true, okay to include 'multi' ids
# if all ids are NULL, use undef - will match NULLS
sub choose_filter_ids {
  my($table,$col,$multi_ok,$fraction)=@_;
  $fraction=0.5 unless defined $fraction;
  my $nrows=ceil($fraction*scalar(@$table));
  my @all_ids=grep {defined $_} map {$_->[$col]} @$table;
  @all_ids=grep !/multi/,@all_ids unless $multi_ok;
  @all_ids? [uniq(@all_ids[0..min($#all_ids,$nrows-1)])]: [undef];
}
sub grep_table {
  my($table,$col,$ids)=@_;
  my $pattern=join('|',map {"\^$_\$"} @$ids);
  $pattern=qr/$pattern/;
  [grep {$_->[$col]=~/$pattern/} @$table];
}