package Text::Scigen::scigen;
# This file is part of SCIgen.
#
# SCIgen is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# SCIgen is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SCIgen; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
use strict;
use vars qw($SCIGEND_PORT);
#### daemon settings ####
$SCIGEND_PORT = 4724;
sub dup_name {
my $name = shift;
return $name . "!!!";
}
sub file_name {
my $name = shift;
return $name . ".file";
}
sub read_rules {
my ($fh, $rules, $RE, $debug, $self) = @_;
my $line;
while ($line = <$fh>) {
next if $line =~ /^#/ ;
next if $line !~ /\S/ ;
my @words = split /\s+/, $line;
my $name = shift @words;
my $rule = "";
# non-duplicate rule
if( $name =~ /([^\+]*)\!$/ ) {
$name = $1;
push @{$rules->{dup_name("$name")}}, "";
next;
}
# include rule
if( $name =~ /\.include$/ ) {
my $file = $words[0];
# make sure we haven't already included this file
# NOTE: this allows the main file to be included at most twice
if( defined $rules->{&file_name($file)} ) {
if( $debug > 0 ) {
print "Skipping duplicate included file $file\n";
}
next;
} else {
$rules->{&file_name($file)} = 1;
}
if( $debug > 0 ) {
print "Opening included file $file\n";
}
my $path = $self->_find( $file );
open(my $inc_fh, "<:utf8", $path)
or die( "Couldn't open included file $path" );
&read_rules( $inc_fh, $rules, undef, $debug );
next; # we don't want to have .include itself be a rule
}
if ($#words == 0 && $words[0] eq '{') {
my $end = 0;
while ($line = <$fh>) {
if ($line =~ /^}[\r\n]+$/) {
$end = 1;
last;
} else {
$rule .= $line;
}
}
if (! $end) {
die "$name: EOF found before close rule\n";
}
} else {
$line =~ s/^\S+\s+//;
chomp ($line);
$rule = $line;
}
# look for the weight
my $weight = 1;
if( $name =~ /([^\+]*)\+(\d+)$/ ) {
$name = $1;
$weight = $2;
if( $debug > 10 ) {
warn "weighting rule by $weight: $name -> $rule\n";
}
}
do {
push @{$rules->{$name}}, $rule;
} while( --$weight > 0 );
}
if( defined $RE ) {
compute_re( $rules, $RE );
}
}
sub compute_re {
# must sort; order matters, and we want to make sure that we get
# the longest matches first
my ($rules, $RE) = @_;
my $in = join "|", sort { length ($b) <=> length ($a) } keys %$rules;
$$RE = qr/^(.*?)(${in})/s ;
}
sub generate {
my ($rules, $start, $RE, $debug, $pretty) = @_;
my $s = expand ($rules, $start, $RE, $debug);
if( $pretty ) {
$s = pretty_print($s);
}
return $s;
}
sub pick_rand {
my ($set) = @_;
my $n = $#$set + 1;
my $v = @$set[int (rand () * $n)];
return $v;
}
sub pop_first_rule {
my ($rules, $preamble, $input, $rule, $RE) = @_;
$$preamble = undef;
$$rule = undef;
my $ret = undef;
if ($$input =~ s/$RE//s ) {
$$preamble = $1;
$$rule = $2;
return 1;
}
return 0;
}
sub pretty_print {
my ($s) = shift;
my $news = "";
my @lines = split( /\n/, $s );
foreach my $line (@lines) {
my $newline = "";
$line =~ s/(\s+)([\.\,\?\;\:])/$2/g;
$line =~ s/(\b)(a)\s+([aeiou])/$1$2n $3/gi;
if( $line =~ /\\section(\*?){(.*)}/ ) {
$newline = "\\section${1}{" .
Text::Autoformat::autoformat( $2, { case => 'highlight',
squeeze => 0 } );
chomp $newline;
chomp $newline;
$newline .= "}";
} elsif( $line =~ /(\\subsection){(.*)}/ or
$line =~ /(\\slideheading){(.*)}/ ) {
$newline = $1 . "{" .
Text::Autoformat::autoformat( $2, { case => 'highlight',
squeeze => 0 } );
chomp $newline;
chomp $newline;
$newline .= "}";
} elsif( $line =~ /\\title{(.*)}/ ) {
$newline = "\\title{" .
Text::Autoformat::autoformat( $1, { case => 'highlight',
squeeze => 0 } );
chomp $newline;
chomp $newline;
$newline .= "}";
} elsif( $line =~ /(.*) = {(.*)}\,/ ) {
my $label = $1;
my $curr = $2;
# place brackets around any words containing capital letters
$curr =~ s/\b([^\s]*[A-Z]+[^\s\:]*)\b/\{$1\}/g;
$newline = "$label = {" .
Text::Autoformat::autoformat( $curr, { case => 'highlight',
squeeze => 0 } );
chomp $newline;
chomp $newline;
$newline .= "},";
} elsif( $line =~ /\S/ ) {
$newline =
Text::Autoformat::autoformat( $line, { case => 'sentence',
squeeze => 0,
break => break_latex(),
ignore => qr/^\\/ } );
}
$newline =~ s/\\Em/\\em/g;
if( $newline !~ /\n$/ ) {
$newline .= "\n";
}
$news .= $newline;
}
return $news;
}
sub break_latex($$$) {
my ($text, $reqlen, $fldlen) = @_;
if( !defined $text ) {
$text = "";
}
return { $text, "" };
}
sub expand {
my ($rules, $start, $RE, $debug) = @_;
# check for special rules ending in + and #
# Rules ending in + generate a sequential integer
# The same rule ending in # chooses a random # from among preiously
# generated integers
if( $start =~ /(.*)\+$/ ) {
my $rule = $1;
my $i = $rules->{$rule};
if( !defined $i ) {
$i = 0;
$rules->{$rule} = 1;
} else {
$rules->{$rule} = $i+1;
}
return $i;
} elsif( $start =~ /(.*)\#$/ ) {
my $rule = $1;
my $i = $rules->{$rule};
if( !defined $i ) {
$i = 0;
} else {
$i = int rand $i;
}
return $i;
}
my $full_token;
my $repeat = 0;
my $count = 0;
do {
my $input = pick_rand ($rules->{$start});
$count++;
if ($debug >= 5) {
warn "$start -> $input\n";
}
my ($pre, $rule);
my @components;
$repeat = 0;
while (pop_first_rule ($rules, \$pre, \$input, \$rule, $RE)) {
my $ex = expand ($rules, $rule, $RE, $debug);
push @components, $pre if length ($pre);
push @components, $ex if length ($ex);
}
push @components, $input if length ($input);
$full_token = join "", @components ;
my $ref = $rules->{dup_name("$start")};
if( defined $ref ) {
my @dups = @{$ref};
# make sure we haven't generated this exact token yet
foreach my $d (@dups) {
if( $d eq $full_token ) {
$repeat = 1;
}
}
if( !$repeat ) {
push @{$rules->{dup_name("$start")}}, $full_token;
} elsif( $count > 50 ) {
$repeat = 0;
}
}
} while( $repeat );
return $full_token;
}
1;