The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w
######################################################
# nextool.pl
######################################################
# 
# Thanks to Peter Yang and Chengzhi Liang for the original version 
#
# $Id: nextool.pl,v 1.42 2012/02/07 21:49:27 astoltzfus Exp $

# this script provides a set of functions for manipulating NEXUS files
# eg, select/exclude/rename OTUs, select character columns in OTUs
# select/exclude subtree, select/reroot tree

use strict;
use Pod::Usage;
use Carp;
use Data::Dumper;
$Data::Dumper::Maxdepth=3;
use Bio::NEXUS;
use Bio::NEXUS::Functions; 

my $infile = shift @ARGV;
unless ($infile) {die "Usage: nextool.pl <infile> [outfile] [command [options]]\n";}

if ($infile eq "-v") {
    die "nextool.pl \$Revision: 1.42 $\n"; 
}
if ($infile eq "-h" || $infile eq '--help') {
    pod2usage(-exitval => 0, -verbose => 1);
}

my $outfile = shift @ARGV;
my $command = shift @ARGV;
#if ($outfile eq "-") {$outfile = \*STDOUT;}
if ($outfile =~ /(^-$|^STDOUT$)/) {$outfile = \*STDOUT;}
if (! $outfile || ! $command || $command eq "rewrite") { 
    my $nexus = new Bio::NEXUS($infile, 1);
    $nexus->write($outfile||"temp.nex", 1);
    exit;
}

my $nexus = new Bio::NEXUS($infile);

if ($command eq "rename_otus") {
    $nexus = rename_otus($nexus, @ARGV);
    $nexus->write($outfile);
}elsif ($command eq "select") {
    my $by = shift @ARGV;
    if ($by =~ /^block/i) { $nexus = selectbyblock($nexus, @ARGV); }
    elsif ($by =~ /^otu/i) { $nexus = selectbyotu($nexus, 1, @ARGV); }
    elsif ($by =~ /^subtree/i) { $nexus = selectbyinode($nexus, @ARGV); }
    elsif ($by =~ /^tree/i) { $nexus = selectbytree($nexus, @ARGV); }
    elsif ($by =~ /^column/i) { $nexus = selectbycolumn($nexus, 1, @ARGV); }
    elsif ($by =~ /^set/i) { $nexus = selectbysets($nexus, 1, @ARGV); }
    $nexus->write($outfile);
}elsif ($command eq "exclude") {
    my $by = shift @ARGV;
    if ($by =~ /block/i) { $nexus = exclude_blocks($nexus, @ARGV); }
    elsif ($by =~ /otu/i) { $nexus = selectbyotu($nexus, 0, @ARGV); }
    elsif ($by =~ /subtree/i) { $nexus = exclude_subtree($nexus, @ARGV); }
    elsif ($by =~ /^column/i) { $nexus = selectbycolumn($nexus, 0, @ARGV); }
    elsif ($by =~ /^set/i) { $nexus = selectbysets($nexus, 0, @ARGV); }
    $nexus->write($outfile);
}elsif ($command eq "reroot") {
    $nexus = reroottree($nexus, @ARGV);
    $nexus->write($outfile);
} elsif ($command eq "addtree") {
    &addtree($nexus,@ARGV);
    $nexus->write($outfile);
} elsif ($command =~ /^makesets?$/) {
    my $by = shift @ARGV;
    if ($by =~ /(?:file|^f$)/i) {    $nexus = setsbyfile($nexus, @ARGV); }
    elsif ($by =~ /(?:inode|^i$)/i) {    $nexus = setsbyinode($nexus, @ARGV); }
    elsif ($by =~ /(?:otu|^o$)/i) {    $nexus = setsbyotus($nexus, @ARGV); }
    elsif ($by =~ /(?:clade|^c$)/i) {    $nexus = setsbyclade($nexus, @ARGV); }
    elsif ($by =~ /(?:union|^u$)/i) {    $nexus = setsbyunion($nexus, @ARGV); }
    elsif ($by =~ /(?:difference|^d$)/i) {    $nexus = setsbydifference($nexus, @ARGV); }
    elsif ($by =~ /(?:charstate|^cs$)/i) {    $nexus = setsbycharstate($nexus, @ARGV); }
    elsif ($by =~ /(?:cladeconsensus|^cc$)/i) {    $nexus = setsbycladeconsensus($nexus, @ARGV); }
    else {die "Expecting command that looks like 'makesets <bymode> <arguments>', where bymode is one of: file, inode, otu, clade, union, difference, charstate, cladeconsensus\n"};
    $nexus->write($outfile);
} elsif ($command =~ /^removesets?$/) {
    $nexus = removesets($nexus, @ARGV);
    $nexus->write($outfile);
} elsif ($command =~ /^renamesets?$/) {
    $nexus = renamesets($nexus, @ARGV);
    $nexus->write($outfile);
} elsif ($command =~ /^listsets?$/) {
    file_overwrite_warning($infile, $outfile, $command, 0);
    listsets($nexus, $outfile, @ARGV);
} elsif ($command =~ /^listsetnames?$/) {
    file_overwrite_warning($infile, $outfile, $command, 0);
    listsetnames($nexus, $outfile);
} else {
    die "unknown command: $command\n";
}

exit;



######### SUBROUTINES #########################

# rename OTUs
# 
sub rename_otus {
    my ($nexus, $transfile) = @_;
    open(FILE, "<$transfile") or die "ERROR: Can\'t open $transfile\n";
    my $lines; 
    while ( $_ = <FILE> ) {
    	s/^#.*$//;
    	next if /^\s*$/;
    	$lines .= $_; 
    }
	close( FILE ); 
    my %translation = @{ &_parse_nexus_words( $lines ) }; 
    return $nexus->rename_otus(\%translation);
}

# select a subset of blocks
sub selectbyblock {
    my ($nexus, @blocks) = @_;
    return $nexus->select_blocks(\@blocks);
}

# select subset of OTUs 
sub selectbyotu {
    my ($nexus, $mode, @args) = @_;
    die "need otu names" unless (@args);
    my @otus;
    if ($args[0] eq '-f') { # input from file
        my $file = $args[1];
        open(FILE, $file) or carp "File $file not found\n";
        my @lines = <FILE>;
        @otus = split /\s+/, "@lines";
        close(FILE);
    }else {   # input from command line separated by space
        my $list = join( " ", @args ); 
        $list = unquote( $list ); 
        $list =~ s/^\s+|\s+$//g; 
        @otus = split( /[,\s]\s*/, $list ); 
    }
    if ( $mode == 1 ) { # select mode 
        return $nexus->select_otus(\@otus); 
    }
    else { # exclude mode 
        return $nexus->exclude_otus(\@otus); 
    }
}

# select a tree
sub selectbytree {
    my ($nexus, $treename) = @_;
    ($treename) or die "ERROR: Need to specify a tree to be selected\n";
    return $nexus->select_tree($treename);
}

# select a subtree
sub selectbyinode {
    my ($nexus, $nodename, $treename) = @_;
    $nodename or die "ERROR: Need to specify an internal node for subtree\n";
    
    return $nexus->select_subtree($nodename, $treename);
}

# use $command=1 for select and $command=0 for exclude
# arguments (column numbers) can be of form: 
# 1) 1-3 4 5 6-10 or 1-3, 4  5, 6-10
# 2) -f <file name> contains numbers in format as examplified in 1)  
sub selectbycolumn {
    my ($nexus, $command, @args) = @_;
    my $args = "@args";
    $args =~ s/title\s*=\s*(\w+)//i;    
    my $title = $1;
#    my $block = $nexus->get_block("characters", $title);
    
    die "need column numbers" unless $args;
    
    my $columns;
    if ($args =~ /-f (\S+)/) { # input from file
        my $file = $1;
        $columns = do{ local(@ARGV, $/) = $file; <>};
        $columns =~ s/\n/ /g;
    }else {   # input from command line separated by comma or space
        $columns = $args;
    }
    
    my @columns = @{ &parse_number($columns) };
    if ($command) {
        return $nexus->select_chars(\@columns, $title);
    }else {
        return $nexus->exclude_chars(\@columns, $title);
    }
}

sub selectbysets {
    my ($nexus, $mode, @setnames) = @_;    
    my @otus;
    die "Provide set names as arguments\n" unless (@setnames);
    for my $setname (@setnames) {
        push(@otus, @{ $nexus->get_block('sets')->get_taxset($setname) } );
    }

    if ( $mode == 1 ) { # "select" mode 
        return $nexus->select_otus(\@otus); 
    } else { # "exclude" mode 
        return $nexus->exclude_otus(\@otus); 
    }
}

# parse numbers in format "1-3, 4 6 8-10"
sub parse_number {
    my $s = unquote( shift );
    
    if (! $s =~ /^\s*(\d+(-\d+)?)([,\s]\s*\d+(-\d+)?)*\s*$/ ) { 
        die "Invalid number format.  Use 1 or 1-3 or 1, 3, 5-8 or 1 3 5 6-10.\n"; 
    } 
    $s =~ s/^\s+|\s+$//g;
    $s =~ s/,?\s+/,/g;  # use ',' as separator
    my @cols = split(/,/, $s);
    
    my @arr;
    foreach my $item (@cols) {
        if ($item =~ /-/) { # eg 1-3
            $item =~ /([0-9]+)\s*-\s*([0-9]+)/;
            for (my $i = $1; $i <= $2; $i++) { push ( @arr, $i-1 ); }
        } elsif ($item =~ /^\d+$/) { # eg 4
            push ( @arr, $item-1 );
        } elsif ($item) {
            die "non-number was used for column number\n";
        }
    }

    @arr = sort {$a<=>$b} @arr;
    return \@arr;
}

sub unquote { 
    my $string = shift; 
    $string =~ s/^ *'(.*)' *$/$1/;
    $string =~ s/^ *"(.*)" *$/$1/;
    return( $string ); 
}

sub exclude_blocks {
    my ($nexus, @blocks) = @_;
    return $nexus->exclude_blocks(\@blocks);
}

sub exclude_subtree {
    my ($nexus, $subtree) = @_;
    return $nexus->exclude_subtree($subtree);
}

sub reroottree {
    my ($nexus, $outgroup, $root_position, $treename) = @_;
    if ( $nexus->get_block('trees')->get_tree('$root_position') ) { #in case no root position is supplied, but a tree name is
        ($root_position, $treename) = (undef, $root_position);
    }
    return $nexus->reroot($outgroup, $root_position, $treename);
}

sub addtree {
    my ($nexus,$treeFile,$treeName) = @_;

    if ( !$treeName ) { ($treeName = (split("/",$treeFile))[-1]) =~ s/(.*)\..*/$1/; }

    if ( -f $treeFile ) {
        open(TF,"<$treeFile") || warn("ERROR: could not open $treeFile. No tree data will appear in NEXUS file.");

        my ($treeString);
        while(<TF>) {
            s/\s*//go;
            $treeString .= $_;
        }

        close(TF);

        $treeString =~ s/(.*);$/$1/; #remove semicolon from end of $treeString, if there is one.

        #$treeString must be formatted like this in order to work w/ Bio::NEXUS::TreesBlock::new -
        #      tree  $treeName = $treeString
        #the following manipulations ensure we end up w/ such a string to pass to create_block
        $treeString =~ s/^tree(.*)$/$1/;
        unless ( $treeString =~ m/^$treeName=/ ) {
            $treeString = $treeName . " = " . $treeString;
        }
        $treeString = "tree " . $treeString;

        print STDERR "tree string:\n$treeString\n";
        $nexus->add_block( $nexus->create_block('trees',$treeString, 1) );
    } else {
        die("Tree file: \"$treeFile\" not found or could not be read");
    }
}

sub setsbyfile {
    my ($nexus, @setfiles) = @_;
    my $sets;

    for my $setfile (@setfiles) {
        open (SF, "<$setfile") || die ("Could not open set file $setfile.\n\n");
        while (<SF>) {
            chomp;
            push (@{$$sets{$setfile}}, $_);
        }
        close (SF);
    }
    &add_or_create_set ($nexus, $sets);
    return $nexus;
}

sub setsbyinode {
    my ($nexus, @inodenames) = @_;
    my $sets;
    
    for my $inodename (@inodenames) {
        unless ($nexus->get_block('trees')->get_tree()->find($inodename)) {
            die "User-provided inode <$inodename> is not the name of an inode in this NEXUS file\nCommand aborted\n\n";
        }
        my $subtree = $nexus->select_subtree($inodename);
        my $otus = $subtree->get_otus();
        $$sets{$inodename} = $otus;
    }
    &add_or_create_set ($nexus, $sets);
    return $nexus;
}

sub setsbyclade {
    my ($nexus, @clade_elements) = @_;
    my ($sets, $treename);# $treename is currently a dummy variable, but we should implement it

    unless (validate_otus($nexus, [@clade_elements])) {die "User-provided OTUs failed validation against TAXA Block\n\n"}

    while (scalar(@clade_elements) > 0) {
        my ($otu1, $otu2) = splice(@clade_elements, 0, 2);
        unless (defined $otu2) {die "You have supplied an odd number of OTU's.  This mode requires pairs.\n\n"}
         $otu1 = $nexus->get_block('trees')->get_tree($treename)->find($otu1);
         $otu2 = $nexus->get_block('trees')->get_tree($treename)->find($otu2);
        my $inode_name = $otu1 -> mrca($otu2, $treename) -> name();
        my $otus = $nexus->select_subtree($inode_name)->get_otus();
        $$sets{$inode_name} = $otus;
    }
    
    &add_or_create_set ($nexus, $sets);
    return $nexus;
}

sub setsbyotus {
    my ($nexus, @otuargs) = @_;
    my $otuargs = join(" ", @otuargs);
    my $sets;
    
    my @setsandelements = $otuargs =~ /\s*([^=]+)\s*=\s*\[\s*([^\]]+)\s*\]/g;
    
    while (@setsandelements) {
        $$sets{shift(@setsandelements)} = [split(/\s/, splice(@setsandelements, 1, 1))];
    }
    
    my $validation = 1;
    for my $users_otus (values %$sets) {
        $validation = validate_otus($nexus, $users_otus);
    }

    unless ($validation == 1) {die "User-defined sets failed validation against TAXA Block\n\n"}
    
    &add_or_create_set ($nexus, $sets);
    return $nexus;
}

sub setsbyunion {
    my ($nexus, @setargs) = @_;
    my $setargs = join(" ", @setargs);
    my $sets = $nexus -> get_block('sets') -> get_taxsets();
    
    my @setsandelements = $setargs =~ /\s*(\b[^=]+\b)\s*=\s*\[\s*([^\]]+)\s*\]/g;
    while (@setsandelements) {
        my ($newsetname, @subsets) = (shift(@setsandelements), split(/(?:\s+|\s*\+\s*)/, shift(@setsandelements)));
        my @otus;
        for my $subset (@subsets) {
            push(@otus, @{$nexus -> get_block('sets') -> get_taxset($subset)});
        }
        $$sets{$newsetname} = [@otus];
    }
    
    &add_or_create_set ($nexus, $sets);
    return $nexus;
}

sub setsbydifference {
    my ($nexus, @setargs) = @_;
    my $setargs = join(" ", @setargs);
    my $sets = $nexus -> get_block('sets') -> get_taxsets();
#    print Dumper ($sets);
    
    @setargs = $setargs =~ /\s*([^=\s]+)\s*=\s*\[\s*([^-\s\]]+)\s*-\s*([^-\s\]]+)\s*\]/g;
    my (@difference_set_names, @minuend_set_names, @subtrahend_set_names);
    while (@setargs) {
        push(@difference_set_names, shift (@setargs));
        push(@minuend_set_names, shift (@setargs));
        push(@subtrahend_set_names, shift (@setargs));
    }
    
    for (my $i = 0; $i < @difference_set_names; $i++) {
        $$sets{$difference_set_names[$i]} = [@{$$sets{$minuend_set_names[$i]}}];
        for (my $j = 0; $j < @{$$sets{ $difference_set_names[$i] }}; $j++ ) {
            for my $subtrahendOTU ( @{$$sets{ $subtrahend_set_names[$i] }} ) {
                if ( ${$$sets{ $difference_set_names[$i] }}[$j] eq $subtrahendOTU) {
                    splice ( @{$$sets{ $difference_set_names[$i] }}, $j, 1);
                }
            }
        }
    }
    
    &add_or_create_set ($nexus, $sets);
    return $nexus;
}

sub setsbycharstate {
    my ($nexus, @char_args) = @_;
    my ($proposed_sets, $title) = &parse_charstate_args(@char_args);
    my ($sets, @column_labels);
    my $block = $nexus->get_block("characters", $title);
    
    my $character_labels = $block->get_charlabels;
    my $seq_length = $block->get_nchar;
    my %char_matrix = %{$block->get_otuset->get_otu_sequences};
    if ($character_labels && @$character_labels) { 
        @column_labels = @$character_labels; 
    } elsif ($seq_length) {
        for (1 .. $seq_length) {push @column_labels, $_;}
    }
    my %column_indices;
    for (my $i = 0; $i < @column_labels; $i++) {
        $column_indices{$column_labels[$i]} = $i;
    }
    foreach my $proposed_setname (keys %{$proposed_sets}){
        while (@{$$proposed_sets{$proposed_setname}} > 0 ) {
            my ($column_label, $query_value) = splice(@{$$proposed_sets{$proposed_setname}}, 0, 2);
            foreach my $otu (keys %char_matrix) {
                my $locus_value = substr($char_matrix{$otu},$column_indices{$column_label},1);
                if ($query_value =~ m/$locus_value/) {
                    push (@{$$sets{$proposed_setname}}, $otu);
                }
            }
        }
    }
    
    &add_or_create_set ($nexus, $sets);
    return $nexus;
}

sub parse_charstate_args {
    my $char_args = join (" ", @_);
    $char_args =~ m/title\s*=\s*(\w+)/i;    
    my $title = $1;                                                # store the captured title name, if one was provided
    my $num_title_matches = $char_args =~ s/title\s*=\s*\w+//g;    # make sure the user only referred to one title (to prevent ambiguity)
    if ($num_title_matches > 1 ) {die ("Too many 'title' arguments--please reference only one Characters Block\n\n")}
                # the following line matches each set as defined by the user and stores it in the @proposed_sets array
                # basically, it looks for bracket pairs that look like foo[bar], 
                # which may or may not be preceded by a set name declaration ('set_name = ')
    my @proposed_sets = $char_args =~ m/(?:[-\w]+\s*=\s*)?\s*[-\w]+\s*\[[^\]]+\]\s*/g;
    my %proposed_sets;
    for my $proposed_set (@proposed_sets) {                     # for each of the matches made above
        my $set_name;
        if ($proposed_set =~ m/^\s*([^\s\[\]]+)\s*=/) {            # if a name declaration was matched
            $set_name = $1;                                        # use that as the set name
            $proposed_set =~ s/^\s*[^\s\[\]]+\s*=\s*//;            # then, get rid of that portion of the array element
        } else {                                                # otherwise, make up a unique set name by joining the column label and locus value(s)
            $set_name = join("", ($proposed_set =~ m/[^"\s\[\]]/g));
        }
        my ($column_label, $locus_value);
        while ($proposed_set =~ m/[^"\s]/) {                    # as long as $proposed_set contains more than spaces and quotes
            ($column_label) = $proposed_set =~ m/[^"\s\[]+/;    # match the column label
            $proposed_set =~ m/\[([^\]]+)\]/;                    # and match the locus value(s)
            $locus_value = $1;
            $locus_value =~ s/[\s,]//g;                            # get rid of any spaces or commas the user may have separated the locus values with
            push (@{$proposed_sets{$set_name}}, $column_label, $locus_value); # push the column label and locus value(s) onto this sets array in the set hash
            $proposed_set =~ s/^[^\]]*\]//;                        # destroy the evidence
        }
    }
    return (\%proposed_sets, $title);
}

sub setsbycladeconsensus {
    my ($nexus, @args) = @_;
    my $consensus_sets = {};
    my $treename;
    my $args = join(' ', @args);
    $args =~ m/title\s*=\s*(\w+)/i;
    my $title = $1;
    if ($title) {$args =~ s/\s*title\s*=\s*\w+\s*//i;}
    if ($args =~ /(\w+)/) {$treename = $1;}
    my $char_block = $nexus->get_block("characters", $title);
    my %otunames_sequences = %{$char_block->get_otuset->get_otu_sequences};
    my $tree = $nexus->get_block('trees')->get_tree($treename)->clone();
    my $rootnode = $tree->get_rootnode();
    &set_consensus_seqs($rootnode, \%otunames_sequences);
    ($rootnode, $consensus_sets) = &simplify_tree($rootnode, $consensus_sets);
    $tree->set_name("nonredundant" . $tree->name());
    $nexus->get_block('trees')->add_tree($tree);
    &add_or_create_set ($nexus, $consensus_sets);
    return $nexus;
}

sub set_consensus_seqs {
    my ($node, $otus_seqs) = @_;
    my @children = @{ $node->children() };
    for my $child (@children) {
        if ( $child->is_otu() ) {
            $child->set_seq(${$otus_seqs}{$child->name()});
            if ( ! $node->get_seq() || ( $node->get_seq() eq $child->get_seq() ) ) {
                $node->set_seq($child->get_seq());
            } else {
                $node->set_seq('NO_CONSENSUS');
            }
        } else {
            &set_consensus_seqs($child, $otus_seqs);
            if ( ! $node->get_seq() || ( $node->get_seq() eq $child->get_seq() ) ) {
                $node->set_seq($child->get_seq());
            } else {
                $node->set_seq('NO_CONSENSUS');
            }
        }
    }
}

sub simplify_tree {
    my ($node, $consensus_sets) = @_;
    my $setname;
    if ($node->get_seq() ne 'NO_CONSENSUS' && ! $node->is_otu() ) {
        my @otu_descendents = @{ $node->get_otus() };
        $setname = $otu_descendents[0]->name() . "_GROUP";
        ${ $consensus_sets }{$setname} = [];
        for my $descendent (@otu_descendents) {
            push(@{${$consensus_sets }{$setname}}, $descendent->name());
        }
        my $newlength = $node->distance($otu_descendents[0]) + $node->length();
        my $parent = $node->get_parent();
        my $siblings = $node->get_siblings();
        $parent->set_children($siblings);
        $parent->adopt($otu_descendents[0],0);
        $otu_descendents[0]->set_length($newlength);
    } else {
        for my $child (@{ $node->children() }) {
            simplify_tree($child, $consensus_sets);
        }
    }
    return ($node, $consensus_sets);
}

sub validate_otus { #Verifies that user-specified OTU names are taxlabels in the TAXA block
    my ($nexus, $users_otus) = @_;
    my $validation = 1;
    for my $users_otu (@$users_otus) {
        unless (defined ($nexus->get_block('taxa')->is_taxon($users_otu, 1))) {
            $validation = 0;
        }
    }
    return $validation;
}

sub add_or_create_set {
    my ($nexus, $sets) = @_;
    my $setsblock;
    if ($nexus -> get_block('Sets')) {
        $nexus -> get_block('Sets') -> add_taxsets($sets);
    } else {
        $setsblock = Bio::NEXUS::SetsBlock -> new('Sets');
        $setsblock -> set_taxsets($sets);
        $nexus -> add_block($setsblock);
    }
    return $nexus;
}

sub removesets {
    my ($nexus, @usersets) = @_;
    my $failure = 0;
    my $sets_to_remove;
    my $setsblock = get_setsblock_if_setsblock($nexus);
    my $taxsets = $setsblock->get_taxsets();
    unless (scalar(keys %$taxsets) > 0) {die "The SETS Block is already empty\n\n";}

    if ($usersets[0] =~ /^-p$/i) {
        shift @usersets;
        for my $userset (@usersets) {
            my $match = 0;
TAXSET:        for my $taxset (keys %$taxsets) {
                if ($taxset =~ /^$userset$/) {
                    push(@$sets_to_remove, $taxset);
                    $match = 1;
                    next TAXSET;
                }
            }
            if ($match == 0) {
                warn "<$userset> does not match any sets in this NEXUS file\n";
                $failure = 1;
            }
        }
    } else {
         for (my $i=0; $i < scalar(@usersets); $i++) {
             if ($$taxsets{$usersets[$i]}) {
                push (@$sets_to_remove, $usersets[$i]);
             } else { 
                 warn "<$usersets[$i]> is not the name of a set in this NEXUS file.\n";
                 $failure = 1;
             }
         }
    }
    die "Command aborted\n\n" if $failure == 1;
    $nexus->get_block('sets')->delete_taxsets(@$sets_to_remove);
    return $nexus;
}

sub renamesets {
    my ($nexus, @old_and_new) = @_;
    my $setsblock = get_setsblock_if_setsblock($nexus);
    $setsblock->rename_taxsets(@old_and_new);
    return $nexus;
}

sub listsets {
    my ($nexus, $outfile, @sets_to_list) = @_;
    my $setsblock = get_setsblock_if_setsblock($nexus);
    if (@sets_to_list > 0) {
        my $fh = open_filehandle($outfile);
        for (@sets_to_list) {
            my @otus = @{ $setsblock->get_taxset($_) };
            if (@otus == 0) {
                warn("$_ = []    Possible error: Set is empty or does not exist\n\n");
            } elsif (@otus > 0) {
                print $fh "$_ = [@otus]\n\n";
            }
        }
    } else {
        $setsblock->print_all_taxsets($outfile);
    }
}

sub listsetnames {
    my ($nexus, $outfile) = @_;
    my $setsblock = get_setsblock_if_setsblock($nexus);
    my @setnames = @{ $setsblock->get_taxset_names() };
    my $fh = open_filehandle($outfile);
    print $fh "@setnames\n";
}

sub open_filehandle {
    my ($fh) = @_;
    if ($fh eq "-" || $fh eq \*STDOUT) {return \*STDOUT}
    else {open(FH, ">$fh") || die "Could not open $outfile for writing\n\n"}
    return \*FH;
}

sub get_setsblock_if_setsblock {
    my ($nexus) = @_;
    my $setsblock;
    unless ($setsblock = $nexus->get_block('sets')){die "This NEXUS file does not contain a SETS Block\n\n"}
    return $setsblock;
}

sub file_overwrite_warning {
    my ($infile, $outfile, $command, $silentmode) = @_;
    if ($silentmode == 1) {return 1}
    if ($infile eq $outfile) {
        warn("Do you really want to overwrite NEXUS file $infile with the $command output?\n");
        if (<STDIN> =~ /y/) {
            return 1;
        } else { die("Command aborted\n\n")}
    } else { return 1;}
}

################# POD Documentation ##################

__END__

=head1 NAME

nextool.pl - Command-line NEXUS file manipulation tool

=head1 DESCRIPTION

Command-line Bio::NEXUS manipulation program

=head1 SYNOPSIS

nextool.pl <input_file> [output_file] [COMMAND [arguments]]

if outfile is not specified, just read input file and output to temp.nex
    
Commands:

    rewrite
    rename_otus <translation_file>
    reroot <outgroup_node_name> [tree_name]
    select blocks <block1 block2 ...>
    select OTU <OTU_1 OTU_2 ...> or <-f filename>
    select tree <tree_name>
    select subtree <inode_number> [tree_name] 
    select column <columns> or <-f filename>
    exclude blocks <block1 block2 ...>
    exclude OTU <otu1 otu2 ...>
    exclude subtree <inode> [treename]
    exclude column <columns> or <-f filename>
    makesets byfile <file1> [file2 file3 ...]
    makesets byinode <inode1> [inode2 inode3 ...]
    makesets byclade <OTU1> <OTU2> [OTU3 OTU4 ...]
        Square brackets are required syntax in this command:
    makesets byotus <set1>=[<OTU1 OTU2 ...>] <set2>=[<OTU3 OTU4 ...>] ...
        Square brackets are required in this command:
    makesets byunion <set1>=[<setA + setB ...>] <set2>=[<setA + setC ...>] ...
        Square brackets are required in this command:
    makesets bydifference <set1>=[<setA - setB>] <set2>=[<setA - setC>] ...
        Square brackets around 'state' argument are required in this command:
    makesets bycharstate [title=char_block_title] [set1=]"<sequence_or_intron_position>[<state>]" [set2=...]
    makesets bycladeconsensus [title=char_block_title]
    removesets <set1 set2 ...>
    listsets [set1 set2 ...]
    listsetnames
    renamesets oldname1 newname1 [oldname2 newname2 ...]


=head1 COMMANDS

=head2 rewrite

Writes the contents of <input_file> to <output_file>.  This is used to standardize the 
format of the file.  this is the default program action if <outfile> is not specified, the output file will be temp.nex

=head2 rename_otus <translation_file>

Renames OTUs (some or all) in taxa, characters and trees blocks according to translation_file. Each non-blank, non-comment (#) line of translation_file must contain oldname whitespace newname.  

=head2 reroot <outgroup_node_name> [tree_name]

reroot the tree of tree_name (optional-- the first tree in trees block if not specified) with outgroup_node_name as the new outgroup

=head2 select blocks <block1 block2 ...>

Select block given in list

=head2 select OTU <OTU_1 OTU_2 ...> or <-f filename>

Selects OTUs given in list. Changes taxa block, characters block and trees block.

=head2 select tree <tree_name>

Selects one tree given the tree name. Changes taxa/characters blocks to match.

=head2 select subtree <inode_number> [tree_name] 

Selects subtree given the tree name and the internal node number. Changes taxa/characters blocks to match.

=head2 select column <columns> or <-f filename>

Selects a number of columns from character lists to get a new set of character lists (for a new Bio::NEXUS file). eg, "1-3, 5 8, 10-15" (comma or space can be used to separate numbers)

=head2 exclude blocks <block1 block2 ...>

Remove blocks from file

=head2 exclude OTU <otu1 otu2 ...>

Remove OTUs from file

=head2 exclude subtree <inode> [treename]

Remove subtree rooted with 'inode' in 'treename' or the first tree if 'treename' is not specified

=head2 exclude column <columns> or <-f filename>

Remove a number of columns from character lists to get a new set of character lists (for a new Bio::NEXUS file). eg, "1-3, 5 8, 10-15" (comma or space can be used to separate numbers)

=head2     makesets byfile <file1> [file2 file3 ...]

Add sets based on OTU's listed in simple text files.  Sets take names of files; OTU's should be newline-delimited in the text files.

=head2 makesets byinode <inode1> [inode2 inode3 ...]

Add sets based on ancestral internal nodes and their children

=head2 makesets byclade <OTU1> <OTU2> [OTU3 OTU4 ...]

Add sets by finding the children of the most recent common ancestor (mrca) of OTU1 and OTU2 pairs

=head2 makesets byotus <set1>=[<OTU1 OTU2 ...>] <set2>=[<OTU3 OTU4 ...>] ...

Add sets by specifying setnames and OTU's each set is to contain.  Syntax is setname=[<OTU LIST>]

=head2 makesets byunion <set1>=[<setA + setB ...>] <set2>=[<setA + setC ...>] ...

Add sets by specifying setnames and which existing sets contain the OTUs the new set will comprise.  Syntax is setname=[<SET LIST>]

=head2 makesets bydifference <set1>=[<setA - setB>] <set2>=[<setA - setC>] ...

Add sets by specifying setnames and which existing sets contain the OTUs that will be used in defining the new sets.

=head2 makesets bycharstate [title=char_block_title] [set1=]"<sequence_or_intron_position>[<state>]" [set2=...]

Add sets by specifying setnames and which existing sets contain the OTUs that will be used in defining the new sets.

=head2 makesets bycladeconsensus [title=char_block_title]

Add sets by finding clades that have a consensus sequence at all loci, and creating one set for each group of "otu synonyms"

=head2 removesets [-p] set1 set2 ...

Delete sets from NEXUS file.  Will empty SETS Block but not remove it from file. '-p' switch allows you to specify regular expression patterns ('set\d*' will delete all sets of the form set<number>.)  QUOTATION MARKS AROUND PATTERNS IS STRONGLY RECOMMENDED.

=head2 listsets

Print sets (all sets by default, otherwise those provided by user) to output file, or standard out of STDOUT or - is used as output filename

=head2 listsetnames

Print setnames to output file, or standard out if STDOUT or - is used as output filename

=head2 renamesets oldname1 newname1 [oldname2 newname2 ...]

Rename sets by space-delimited <oldname newname> pairs

=head1 DESCRIPTION

B<This program> provides several services in the manipulation of NEXUS files (selecting specific OTUs or tree nodes, combining files, renaming OTUs, etc.

=head1 VERSION

$Revision: 1.42 $

=head1 REQUIRES

Bio::NEXUS

=head1 AUTHOR

Chengzhi Liang <liangc@umbi.umd.edu>
Peter Yang <pyang@rice.edu>
Tom Hladish <hladish@umbi.umd.edu>

=cut

##################### End ##########################