#!/usr/bin/perl
# Author: Jason Stajich, jason@bioperl.org
# $Revision: 6576 $
use strict;
use Bio::PopGen::IO;
use Bio::PopGen::Statistics;
use Bio::PopGen::Population;
my $io = new Bio::PopGen::IO(-format => 'prettybase',
# the Bio::Root::IO->catfile is only
# to make file access platform independent
-file => Bio::Root::IO->catfile
(qw( t data popstats.prettybase)));
# This is an example of how to read in data from Bio::PopGen::IO
# We're going to make 2 lists, @outgroup, @ingroup
# @outgroup is a single individual which is named 'out'
# @ingroup is the set of individuals we are testing
my (@ingroup,@outgroup);
while( my $ind = $io->next_individual ) {
if($ind->unique_id =~ /out/) {
push @outgroup, $ind;
} else {
push @ingroup, $ind;
}
}
# We'll get the names of all the markers (or sites)
# that this individual has genotypes for
my @marker_names = $ingroup[0]->get_marker_names();
# the number of sites is the same as the number of markers
# we assume that all the individuals have the same number of sites
# or that this data is 'aligned' if these were derived from a
# multiple sequence alignment
my $sitecount = scalar @marker_names;
foreach my $ind ( @ingroup ) {
# here let's print out the individual name and all their alleles
# for all the markers
# like this
# Name: INDIVIDUALNAME
# A1,A2 B1,B2,...
print "Name: ", $ind->unique_id,"\n";
print "\t";
foreach my $marker ( @marker_names ) {
for my $genotype ( $ind->get_Genotypes($marker) ) {
my @alleles = $genotype->get_Alleles();
# In this example these are actually single alleles anyways...
print join(",", @alleles), " ";
}
}
print "\n";
# There is a more compact way to write that
print "Name: ", $ind->unique_id,
"\n\t", join(" ", map { join(",",$_->get_Alleles) }
map { $ind->get_Genotypes($_) } @marker_names),"\n";
print "--\n";
}
# We can compute some statistics about these individuals
# (underlying assumption is that they are unrelated...)
print "Pi: ",Bio::PopGen::Statistics->pi(\@ingroup), "\n";
print "Theta: ",Bio::PopGen::Statistics->theta(\@ingroup), "\n";
# we can also treat them like a population
my $ingroup_pop = new Bio::PopGen::Population(-individuals => \@ingroup);
print "Pi: ",Bio::PopGen::Statistics->pi($ingroup_pop), "\n";
print "Theta: ",Bio::PopGen::Statistics->theta($ingroup_pop), "\n";
# You can also simulate individuals from a coalescent
use Bio::PopGen::Simulation::Coalescent;
my $ssize = 5;
my $sim = new Bio::PopGen::Simulation::Coalescent(-sample_size => $ssize);
my $tree = $sim->next_tree;
my $mutcount = 100;
$sim->add_Mutations($tree, $mutcount);
# The leaves are the simulated individuals
my @leaves = $tree->get_leaf_nodes;
# We can use the Stats module either like Bio::PopGen::Statistics->XXX
# or like this:
my $stats = new Bio::PopGen::Statistics;
# $stats->verbose(1);
print "Coalescent pi: ", $stats->pi(\@leaves), "\n";
print "Coalescent theta: ", $stats->theta(\@leaves), "\n";
my $coalescent_pop = new Bio::PopGen::Population(-individuals => \@leaves);
print "Coalescent pi: ", $stats->pi($coalescent_pop), "\n";
print "Coalescent theta: ", $stats->theta($coalescent_pop), "\n";