The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w
######################################################
# history.pl
######################################################
# Author: liangc 
# $Id: history.pl,v 1.8 2009/08/13 20:42:58 astoltzfus Exp $

# create ancestral intron states based on an alignment and tree
# in a nexus file. The BUGS program will run many times with small
# number of generations to find those can converge (to global optimum)
# and then run one with a large number of generations (convergence)
# the ancestral states will be computed from the average value of 
# those generations and write back to file in history block

use strict;
use Pod::Usage;
use Data::Dumper;
use Getopt::Long;
use Bio::NEXUS;

my $version = "\$Revision\$";

Getopt::Long::Configure("bundling"); # for short options bundling
my %opts = ();
GetOptions(\%opts, 'help|h', 'man', 'v', 'l=s','n=s', 'b=s', 'm=s', 'debug') or pod2usage(2);
if ( $opts{'man'} ) { pod2usage(-exitval => 0, -verbose => 2) } 
if ( $opts{'help'} ) { pod2usage(1) }
if ($opts{v}) {
    print "history.pl $version\n"; exit;
}

my $infile = shift;
unless ($infile) {die "history.pl [options] <filename>\n";}
my $burnin = defined $opts{b} ? $opts{b} : 1000; # burnin generation #
my $monitor = $opts{m} || 1000; # monitor #
my $good_num = $opts{n} || 3; # the number of files to be selected
my $runlevel = defined $opts{l} ? $opts{l} : 0;
my $debug = $opts{debug};

my $stem = $infile;
$stem =~ s/^(.+).nex$/$1/;
$stem =~ s/^(.+\/)?([^\/]+)$/$2/;
#print "$1 $stem\n"; exit;
$stem =~ s/[^a-zA-Z0-9.]/\./g; # remove non alphanumeric characters in name
my $dir = "$stem.dir";
system "mkdir $dir";
system "cp $infile $dir/$stem.nex";
$infile = "$stem.nex";
chdir "$dir";

my @data; # all monitored data in file
my ($llike, $alpha, $beta, $r, $k, @node);
my $maxlike = -1.E+20;
my $minfile = 0;
my $maxfile = 2;
my @like;
my $names; # name of each node in the index file

#print "$runlevel--$burnin--$monitor\n";exit;
if ($runlevel == 0) {
    exec_bugs();
    re_exec_bugs($minfile, $maxfile, 50, 50);
    rename "$stem.$maxfile.bug", "$stem.0.bug";
}
if ($runlevel < 2) {
    re_exec_bugs(0, 0, $burnin, $monitor);
}else {
    read_bugs_out("$stem.0.bugs1");
}
write_data(0);
modify_nexus();

exit;

# select runs not trapped locally
sub exec_bugs {
    $maxfile = 0;
    for (my $i = 0; $i < 20; $i++) {
	system ("bugs.pl $stem.$i $infile > $stem.$i.bug") == 0 or die "BUGS execution error!\n";
	system "bugs <$stem.$i.cmd >$stem.$i.bugs.log";
	read_bugs_out("bugs1");
	print "$i: ", $llike->[0], "\n" if $debug;
	if ( $alpha->[1] != 0 && abs($alpha->[0]/$alpha->[1]) < 1000 &&
	    $beta->[1] != 0 && abs($beta->[0]/$beta->[1]) < 1000 &&
	    $r->[1] != 0 && abs($r->[0]/$r->[1]) < 1000 &&
	    $k->[1] != 0 && abs($k->[0]/$k->[1]) < 1000 ) {
	    print "   move to $stem.$maxfile ", $llike->[0], "\n" if $debug;
	    rename "$stem.$i.bug", "$stem.$maxfile.bug";
	    $like[$maxfile] = $llike->[0];
	    $maxfile++;
	}
	if ($maxfile == $good_num) {last;}
    }
    if (--$maxfile == -1) {print "Error for file: $infile; no good starting was found\n"; exit; }
    print join(' ', @like), "\n" if $debug;
}

# select a run with maximum mean probability
sub re_exec_bugs {
    my ($bugs1, $bugs2, $burnin, $monitor) = @_;
#    if ($bugs1 == $bugs2) { die "-----Error: $stem\n"; }
#    my ($nchar, $nnodes) = read_node_num("$stem.$bugs1.bug");
#    $burnin = 1000; #int(sqrt(sqrt(sqrt($nchar*$nnodes+1))))*1000;
#    print "burnin: $burnin\n";
    foreach my $i ($bugs1..$bugs2) {
	print "$i running...\n" if $debug;
	system ("bugs.cmd.pl $stem.$i $burnin $monitor") == 0 or die "BUGS error: $infile\n";
	system "bugs <$stem.$i.cmd >$stem.$i.bugs.log";
	rename "bugs.ind", "$stem.$i.bugs.ind";
	rename "bugs.out", "$stem.$i.bugs.out";
	rename "bugs1.ind", "$stem.$i.bugs1.ind";
	rename "bugs1.out", "$stem.$i.bugs1.out";    
	read_bugs_out("$stem.$i.bugs1");
	select_file($i);
	$like[$i] = $llike->[0];
    }
    print join(' ', @like), "\n" if $debug;
}

# select the run to be used
sub select_file {
    my $i = shift;
    if ($llike->[0] > $maxlike) { $maxlike = $llike->[0]; $maxfile = $i; }
}

# read .out file
sub read_bugs_out {
    my $bugstem = shift;
    read_ind($bugstem);
    open (INFILE, "$bugstem.out") or die "Error open file\n";
	
    for (my $i = 1; my $line = <INFILE>; $i++) {
#	    print "$line\n"; exit;
	$line =~ s/^\s+//;
	${$data[$i]} = [split /\s+/, $line];
    }
    close (INFILE);
}

#read index file
sub read_ind {
    my $bugstem = shift;
    open(IND, "$bugstem.ind");
    while (my $line = <IND>) {
	$line =~ s/^s+//;
	my ($v1, $v2) = split /\s+/, $line;
	if (!$v1) { print "$line, -- $v1, $v2\n"; exit;}
	if ($v1 eq 'llike') {
	    $data[$v2] = \$llike;
	}elsif ($v1 eq 'r') {
	    $data[$v2] = \$r;
	}elsif ($v1 eq 'k') {
	    $data[$v2] = \$k;
	}elsif ($v1 eq 'alpha') {
	    $data[$v2] = \$alpha;
	}elsif ($v1 eq 'beta') {
	    $data[$v2] = \$beta;
	}elsif ($v1 =~ /^root/) {
	    my $name = $v1;
	    $name =~ /\[(\d+)\]/;
#	    print "$1\n";
	    $data[$v2] = \$node[0][$1]; # [intron#][first node--root], 
	}elsif ($v1 =~ /^node/) {
	    my $name = $v1;
	    $name =~ /\[(\d+),(\d+)\]/;
#	    print "$2, $1\n";
	    $data[$v2] = \$node[$1][$2]; # [intron#][other internal node]
	}
    }
    close(IND);
}

# write bugs out to a file
sub write_data {
    my ($file) = @_;
#    print &Dumper($llike); exit;
#    print scalar @{$node[1]}, "\n"; exit;
#    print &Dumper($node[0]);exit;

    open(OUTFILE, ">$stem.node.bugs.out");

    my ($nnodes, $nchar) = read_node_num("$stem.$file.bug");
    print OUTFILE "inodes\t", $nnodes, "\n";
    print OUTFILE "characters\t", $nchar, "\n";

    my @llike = map {$_->[0]} @{[$llike]};
    my @r = map {$_->[0]} @{[$r]};
    my @k = map {$_->[0]} @{[$k]};
    my @alpha = map {$_->[0]} @{[$alpha]};
    my @beta = map {$_->[0]} @{[$beta]};
    print OUTFILE "loglikelihood\t", join(' ', @llike), "\n";
    print OUTFILE "alpha\t", join(' ', @alpha), "\n";
    print OUTFILE "beta\t", join(' ', @beta), "\n";
    print OUTFILE "r\t", join(' ', @r), "\n";
    print OUTFILE "k\t", join(' ', @k), "\n";

    $names = read_node_name("$stem.$file.log");
    for (my $i = 0; $i < @node; $i++) {
	print OUTFILE $names->{$i}, "\t";
	for (my $j = 1; $j < @{$node[0]}; $j++) {
	    printf OUTFILE "%4.3f\t", $node[$i][$j][0]-1;
	}
	print OUTFILE "\n";
    }
    close(OUTFILE);
}

# convert internal node names used in bugs run
sub read_node_name {
    my $file = shift;
    open(LOG, "$file");
    my %names;
    $names{0} = 'root';
    while (<LOG>) {
	if (/^\s+$/) {last;}
    }
    while (<LOG>) {
	/(\d+):\s*(\S+)/;
	$names{$1} = $2;
    }
    close(LOG);
#    print &Dumper(%names);exit;
    return \%names;
}

# find out the number of parameters based on tree size and character num
sub read_node_num {
    my $file = shift;
    open(BUG, "$file");
    my ($nchar, $nnodes);
    while (<BUG>) {
	if (/nChar = (\d+)/) {$nchar = $1;}
	elsif (/nNode = (\d+)/) {$nnodes = $1+1;}
	elsif ($nchar && $nnodes) {last;}
    }
    close(BUG);
    return ($nnodes, $nchar);
}

# modifying input nexus file (with new name)
sub modify_nexus {
    my $nexus = new Bio::NEXUS($infile);
    create_history_block($nexus, \@node);
    modify_span_block($nexus);
    $nexus->write("../${stem}.h.nex");
}

# create history block based on BUGS output
sub create_history_block {
    my ($nexus, $nodes) = @_;
    my $otuset = $nexus->get_block('characters', 'intron')->get_otuset()->clone();

    my @otus;
    for (my $i = 0; $i < @$nodes; $i++) {
	my @seq;
	for (my $j = 1; $j < @{$nodes->[0]}; $j++) {
	    push @seq, [sprintf("%4.3f", 2-$nodes->[$i][$j][0]), (sprintf"%4.3f", $nodes->[$i][$j][0]-1)];
	}
	push @otus, new Bio::NEXUS::TaxUnit($names->{$i}, \@seq);
    }
#    @otus = sort { $a->get_name cmp $b->get_name } @otus;
    my $history = new Bio::NEXUS::HistoryBlock('History');
    foreach my $otu (@otus) {
	$otuset->add_otu($otu);
    }
    $history->set_title('intron');
    $history->add_link('taxa', $nexus->get_name);
    $history->set_format($nexus->get_block('characters', 'intron')->get_format());
    $history->set_otuset($otuset);
    $history->add_tree($nexus->get_block("trees")->get_tree);
    $nexus->remove_block('history', 'intron');
    $nexus->add_block($history);
}

# modify span block by adding BUGS parameters
sub modify_span_block {
    my $nexus = shift;
    my %descendant;
    $descendant{program}='BUGS';
    $descendant{version}='unix-0.600';
    my $like = $llike->[0];
    my $a = $alpha->[0];
    my $b = $beta->[0];
    $descendant{parameters}="loglikelihood=$like, alpha=$a, beta=$b, burnin=$burnin, monitor=$monitor";
    my $spanblock = $nexus->get_block("span");
    if (! $spanblock) {
	$spanblock = new Bio::NEXUS::SpanBlock('span'); 
	$spanblock->{method}{ancestral_inference} = \%descendant;
	$nexus->add_block($spanblock);
    }else {
	$spanblock->{method}{descendant} = \%descendant;
    }
}


################# POD Documentation ##################

__END__

=head1 NAME

history.pl

=head1 SYNOPSIS

history.pl [options] <input_file>

  Options:
    --help -h       brief help message
    --man           full documentation
    --debug         output running info
    -n <run-num>           the number of converging initial runs
    -l <running-level>           running level
       0          new run
       1          old run with new number of generations
       2          just retrieve data from existing files
    -b <burin-in> the number of burnin generations
    -m <monitor>  the number of monitoring generations
    -v            print version information and quit

=head1 Description

 create ancestral intron states based on an alignment and tree
 in a nexus file. The BUGS program will run many times with small
 number of generations to find those can converge (to global optimum)
 and then run one with a large number of generations (convergence)
 the ancestral states will be computed from the average value of 
 those generations and write back to file in history block

=head1 REQUIRES

BUGS must be installed for running this program.

=head1 VERSION

$Revision: 1.8 $

=head1 AUTHOR

Chengzhi Liang <liangc@umbi.umd.edu>

=cut

##################### End ##########################