The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w

use Gfsm;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case);
use File::Basename qw(basename);

##======================================================================
## Defaults

our $prog    = basename($0);
our $VERSION = 0.01;

our ($help,$version);

##-- options: I/O
our $outfile = '-';
our $zlevel  = -1;

##-- options: topology
our $acceptor = 0;
our $epsilon  = 1;
our $randomize_lo = 0;

our $n_states = 8; ##-- advisory only
our $n_labels = 2; ##-- including epsilon, if specified

our $w_min = 0;
our $w_max = 0;

our $d_min = 0;
our $d_max = 8;

our $n_xarcs  = 0; ##-- number of cross-arcs (non-cyclic)
our $n_carcs  = 0; ##-- number of cyclic arcs
our $n_uarcs  = 0; ##-- number of random arcs (cyclic or non-cyclic)
our $cl_min    = 1;     ##-- minimum cycle length (including cyclic arc; should be >= 1)
our $cl_max    = undef; ##-- maximum cycle length (default=$d_max+1)

our $seed = undef;
our $sort = 'none'; ##-- state-sort mode (pre -xarcs)

##======================================================================
## Command-Line
GetOptions(##-- General
	   'help|h'    => \$help,
	   'version|V' => \$version,

	   ##-- Topology
	   'seed|srand|r=i'  => \$seed,
	   'acceptor|fsa|A!' => \$acceptor,
	   'transducer|fst|T!' => sub { $acceptor=!$_[1]; },
	   'randomize-inputs|randomize-lower|ri|rl|i!' => \$randomize_lo,

	   'epsilon|eps|e!' => \$epsilon,

	   'n-labels|labels|l=i' => \$n_labels,
	   'n-states|states|q=i' => \$n_states,

	   'min-weight|wmin|w=f' => \$w_min,
	   'max-weight|wmax|W=f' => \$w_max,

	   'min-depth|dmin|d=i' => \$d_min,
	   'max-depth|dmax|D=i' => \$d_max,

	   'breadth-first-sort|bfs' => sub { $sort='bfs' },
	   'depth-first-sort|dfs'   => sub { $sort='dfs' },
	   'no-sort|nosort|ns' => sub { $sort='none' },

	   'n-xarcs|xarcs|xa|x=i' => \$n_xarcs,
	   'n-carcs|carcs|ca|c|n-cycles|ncycles=i' => \$n_carcs,
	   'n-uarcs|uarcs|ua|u|a=i' => \$n_uarcs,
	   'min-cycle-length|clmin|y=i'  => \$cl_min,
	   'max-cycle-length|clmax|Y=i'  => \$cl_max,

	   ##-- I/O
	   'output|o|F=s' => \$outfile,
	   'compress|z=i' => \$zlevel,
	  );

pod2usage({-exitval=>0, -verbose=>0}) if ($help);

if ($version) {

  print STDERR
    ("${prog} v$VERSION by Bryan Jurish <moocow\@cpan.org>\n",
    );
  exit(0);
}

##======================================================================
## Subs

##--------------------------------------------------------------
## $n = max_trie_states($n_labels, $depth_max)
sub max_trie_states {
  my ($na,$dmax) = @_;
  ##-- loop form
  ## : $n_labels==2 --> 0:1 1:3 2:7 3:15 4:31 5:63 6:127 7:255 8:511 9:1023 10:2047
  ## : $n_labels==3 --> 0:1 1:4 2:13 3:40 4:121 5:364 6:1093 7:3280 8:9841 9:29524 10:88573
  ## : $n_labels==4 --> 0:1 1:5 2:21 3:85 4:341 5:1365 6:5461 7:21845 8:87381 9:349525 10:1398101
  if (0) {
    my $nq = 0;
    foreach (0..$dmax) {
      $nq += $na**$_;
    }
    return $nq;
  }
  ##-- closed form; see e.g.:
  ##     http://mathworld.wolfram.com/Repunit.html
  ##     http://oeis.org/A000225 ($n_labels==2)
  ##     http://oeis.org/A003462 ($n_labels==3)
  ##     http://oeis.org/A002450 ($n_labels==4)
  ##     http://mathworld.wolfram.com/MersenneNumber.html ($n_labels==2)
  return ($na**($dmax+1)-1)/($na-1);
}

##======================================================================
## Main


##--------------------------------------------------------------
our ($fsm,$l_min,$q);
sub init_fsm {
  srand($seed) if (defined($seed));

  ##-- sanity checks
  my $nq_max = max_trie_states($n_labels,$d_max);
  if ($n_states > $nq_max) {
    warn("$prog: cannot generate fsm with n_states > M_{d_max+1}^(n_labels) = (n_labels^(d_max+1)-1)/(n_labels-1); setting -n-states=$nq_max\n");
    $n_states = $nq_max;
  }

  $fsm = Gfsm::Automaton->newTrie();
  $fsm->is_transducer(!$acceptor);
  $fsm->is_weighted(1);
  $fsm->semiring_type($Gfsm::SRTTropical);

  $l_min = $epsilon ? 0 : 1;
}
init_fsm();

##--------------------------------------------------------------
our $finals = {}; ##-- $qid => undef
our ($nq);
sub gen_spine {
  ##-- generate base trie
  my (@lo,@hi,$len,$q);
  while ($fsm->n_states() < $n_states) {
    $len = $d_min+int(rand(1+$d_max-$d_min));
    @lo = map {$l_min+int(rand($n_labels))} (1..$len);
    $q = $fsm->add_path(\@lo, \@hi, 0);
    $finals->{$q} = undef;
  }

  ##-- mark unsorted (avoid "smart" arc insertion)
  $fsm->sort_mode(Gfsm::ASMNone());
  $nq = $fsm->n_states();

  ##-- maybe sort
  $sort = '' if (!defined($sort));
  if ($sort eq 'bfs') {
    $fsm->statesort_bfs();
  } elsif ($sort eq 'dfs') {
    $fsm->statesort_dfs();
  }
}
gen_spine();


##--------------------------------------------------------------
## \@qpaths = qpaths($fsm);  ##-- scalar context
## (\@qpaths,$maxdepth) = qpaths($fsm); ##-- list context
##  + s.t. $qpaths->[$q] = pack('L*', $q0,$q1,...,$q)
##  + gets state "addresses"; used to generate guaranteed cyclic arcs

#*qpaths = \&qpaths_v0;
sub qpaths_v0 {
  ##-- init qpaths()
  my $fsm = shift;
  my $qpaths = [''];
  my $rfsm = $fsm->reverse();
  my $ai = Gfsm::ArcIter->new();
  my ($ri,@p,$q);
  my $dmax = 0;
  foreach $q (keys %$finals) {
    @p = ($q);
    for ($ai->open($rfsm,$q); $q != 0; $ai->open($rfsm,$q)) {
      unshift(@p,($q=$ai->target));
    }
    $dmax = $#p if ($#p >= $dmax);
    foreach $ri (1..$#p) {
      $q = $p[$ri];
      next if (defined($qpaths->[$q]));
      $qpaths->[$q] = pack('L*', @p[0..$ri]);
    }
  }
  return wantarray ? ($qpaths,$dmax) : $qpaths;
}

*qpaths = \&qpaths_v1;
sub qpaths_v1 {
  ##-- init qpaths(), using arcpaths() function
  my $fsm = shift;
  my $qpaths = [''];
  my $apaths = $fsm->arcpaths();
  my ($ap,@qp,$qi);
  my $dmax = 0;
  my $arc_ignore_size = $Gfsm::arc_size - length(pack('LL',0,0));
  foreach $ap (@$apaths) {
    #@qp = (0,map {$_->[1]} Gfsm::unpack_arcpath($ap)); ##-- [src,dst,lo,hi,w],...
    @qp = (0,unpack("(x4Lx${arc_ignore_size})*", $ap));
    pop(@qp);
    $dmax = $#qp if ($#qp >= $dmax);
    foreach $qi (1..$#qp) {
      next if (defined($qpaths->[$qp[$qi]]));
      $qpaths->[$qp[$qi]] = pack('L*', @qp[0..$qi]);
    }
  }
  return wantarray ? ($qpaths,$dmax) : $qpaths;
}


##--------------------------------------------------------------
##-- introduce cycles
sub gen_cycles {
  return if ($n_carcs <= 0);
  $cl_min = 1        if ($cl_min <= 0);
  $cl_max = $d_max+1 if (!defined($cl_max) || $cl_max<=0);
  my ($qpaths,$dmax) = qpaths($fsm);
  if ($cl_min >= ($dmax+1)) {
    warn("$prog: requested minimum cycle length $cl_min too large for spine with depth $dmax, using -clmin=".($dmax+1));
    $cl_min = $dmax+1;
  }
  my ($nc, $q,@qp,$qpi_max,$qpi_min, $r,$a);
  for ($nc=0; $nc<$n_carcs; ) {
    $q  = int(rand($nq));
    @qp = unpack('L*',$qpaths->[$q]);
    $qpi_max = @qp-$cl_min;                       ##-- potential cycle-target states r with len(r-*->q)+1 >= min_cycle_len
    $qpi_min = @qp>$cl_max ? (@qp-$cl_max) : 0;   ##-- potential cycle-target states r with len(r-*->q)+1 <= max_cycle_len
    next if ($qpi_min > $qpi_max);                ##-- potential infloop!
    $r = $qp[$qpi_min+int(rand(1+$qpi_max-$qpi_min))];
    $a = $l_min+int(rand($n_labels));
    $fsm->add_arc($q,$r, $a,$a,0);
    ++$nc;
  }
}
gen_cycles();

##--------------------------------------------------------------
##-- add non-cyclic arcs
sub gen_xarcs {
  for ($i=0; $i<$n_xarcs; ++$i) {
    $q = int(rand($nq-1));
    $r = 1 + $q + int(rand($nq-$q-1));
    $a = $l_min+int(rand($n_labels));
    $fsm->add_arc($q,$r, $a,$a, 0);
  }
}
gen_xarcs();

##--------------------------------------------------------------
##-- add arbitrary arcs
sub gen_uarcs {
  for ($i=0; $i<$n_uarcs; ++$i) {
    $q = int(rand($nq));
    $r = int(rand($nq));
    $a = $l_min+int(rand($n_labels));
    $fsm->add_arc($q,$r, $a,$a, 0);
  }
}
gen_uarcs();

##--------------------------------------------------------------
##-- generate upper arc labels
sub gen_upper_labels {
  return if ($acceptor);
  my $ai = Gfsm::ArcIter->new();
  for ($q=0; $q < $fsm->n_states(); ++$q) {
    for ($ai->open($fsm,$q); $ai->ok(); $ai->next()) {
      $ai->upper($l_min+int(rand($n_labels)));
    }
  }
}
gen_upper_labels();

##--------------------------------------------------------------
##-- generate weights
sub gen_weights {
  ($w_min,$w_max) = sort ($w_min,$w_max);
  my $w_rng = $w_max-$w_min;
  return if ($w_min==$w_max && $w_max==0);

  my $ai = Gfsm::ArcIter->new();
  for ($q=0; $q < $fsm->n_states(); ++$q) {
    $fsm->final_weight($q,$w_min+($w_rng>0 ? rand($w_rng) : 0)) if ($fsm->is_final($q));
    for ($ai->open($fsm,$q); $ai->ok(); $ai->next()) {
      $ai->weight($w_min+($w_rng>0 ? rand($w_rng) : 0));
      $ai->upper($l_min+int(rand($n_labels))) if (!$acceptor);
    }
  }
}
gen_weights();



##--------------------------------------------------------------
##-- randomize lower arc labels
sub randomize_lower_labels {
  return if (!$randomize_lo);
  my $ai = Gfsm::ArcIter->new();
  for ($q=0; $q < $fsm->n_states(); ++$q) {
    for ($ai->open($fsm,$q); $ai->ok(); $ai->next()) {
      $ai->lower($l_min+int(rand($n_labels)));
    }
  }
}
randomize_lower_labels();

#$fsm->renumber_states();
#$fsm->statesort_bfs();
#$fsm->statesort_dfs();

##-- dump
$fsm->save($outfile,$zlevel)
  or die("$prog: save failed to gfsm file '$outfile': $!");


__END__

##======================================================================
## Pods
=pod

=pod

=head1 NAME

gfsm-random-trie.perl - create a random trie-based FSM

=head1 SYNOPSIS

 gfsm-random-trie.perl [OPTIONS]

 General Options:
  -help
  -version

 Topology Options:
  -seed SEED                # random seed (default: none)
  -acceptor , -transducer   # build FSA or FST (default=-transducer)
  -epsilon  , -noepsilon    # do/don't include epsilon (zero) labels (default=-epsilon)
  -randomize-lower          # randomize lower (input) labels (default: don't)
  -n-labels=NA              # alphabet size (default=2)
  -n-states=NQ              # target number of states (default=8; output is s.t. NQ <= |Q| < NQ+DMAX)
  -min-weight=W             # minimum weight (default=0)
  -max-weight=W             # maximum weight (default=0)
  -min-depth=DMIN           # minimum spine path length (default=0)
  -max-depth=DMAX           # maximum spine path length (default=8)
  -bfs , -dfs , -nosort     # state-sort to apply before adding arcs (default:-nosort)
  -n-xarcs=N                # number of guaranteed acyclic arcs added to spine (default=0)
  -n-carcs=N                # number of guaranteed  cyclic arcs added to spine (default=0)
  -n-uarcs=N                # number of unrestricted random arcs added to spine (default=0)
  -min-cycle-length=YMIN    # minimum cycle length for guaranteed cyclic arcs (default=0)
  -max-cycle-length=YMAX    # maximum cycle length for guaranteed cyclic arcs (default=DMAX)

 I/O Options:
  -compress=ZLEVEL          # zlib compression level
  -output=GFSMFILE          # output automaton

=cut

##==============================================================================
## Description
##==============================================================================
=pod

=head1 DESCRIPTION

Not yet written.

=cut

##======================================================================
## Footer
##======================================================================

=pod

=head1 ACKNOWLEDGEMENTS

Perl by Larry Wall.

=head1 AUTHOR

Bryan Jurish E<lt>moocow@cpan.org<gt>


=head1 COPYRIGHT AND LICENSE

Copyright (C) 2012 by Bryan Jurish

This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.

=head1 SEE ALSO

perl(1),
Gfsm(3perl)

=cut