The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
#
# Copyright (C) 2004 Jörg Tiedemann  <joerg@stp.ling.uu.se>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# $Id$
#
# summarizes recall, precision, F from all *eval* files in the current DIR
#
# usage: summarize-eval [OPTIONS] [FILES]
#
#  OPTIONS:  -s sort-key ............. specify sorting key (default: F)
#                                      (mtime = modification time)
#  FILES: evaluation files to be checked
#


use File::stat;


my $sort='F';
while ($ARGV[0]=~/^\-/){
    my $opt=shift(@ARGV);
    if ($opt=~/^\-s/){
	$sort=shift(@ARGV);
    }
}
my @files=@ARGV;
if (not @files){
    opendir(DIR, '.');
    @files = grep { /\.eval/ } readdir(DIR);
    closedir DIR;
}


my %s;

foreach my $f (@files){
    chomp $f;
    if (not -f $f){next;}
    my $filestat=stat($f);
    my @result=();
    if ($f=~/\.gz$/){
#	@result=`gzip -cd $f \| grep -A2 'recall:'`;
	@result=`gzip -cd $f \| grep -A13 'size of gold standard:'`;
    }
    else{
#	@result=`grep -A2 'recall:' $f`;
	@result=`grep -A13 'size of gold standard:' $f`;
    }
    $f=~s/links\.//g;
    $f=~s/\.xml//g;
    $f=~s/\.eval//g;
    $f=~s/\.gz//g;
    if ($result[0]=~/size of gold standard:\s+([0-9]+)[\s\,]/s){$s{$f}{N}=$1;}
    if ($result[-3]=~/recall:\s+([0-9\.]+)\%/s){$s{$f}{R}=$1;}
    if ($result[-2]=~/precision:\s+([0-9\.]+)\%/s){$s{$f}{P}=$1;}
    if ($result[-1]=~/F:\s+([0-9\.]+)\%/s){$s{$f}{F}=$1;}

    if ($result[-6]=~/links:\s+([0-9]+)\,/s){$s{$f}{incorrect}=$1;}
    if ($result[-7]=~/links:\s+([0-9]+)\,/s){$s{$f}{partial}=$1;}
    if ($result[-8]=~/links:\s+([0-9]+)\,/s){$s{$f}{correct}=$1;}
    $s{$f}{mtime}=$filestat->mtime;
}

if ($sort eq 'mtime'){
    print "   P      R      F  (time) - ";
    print scalar @files;
    print " files found, sorted by mtime\n";
    foreach (sort {$s{$a}{mtime} <=> $s{$b}{mtime}} keys %s){
	printf "%3.2f  %3.2f  %3.2f (%d) %s\n",
	$s{$_}{P},$s{$_}{R},$s{$_}{F},$s{$_}{mtime},$_;
    }
}
elsif ($sort eq 'correct'){
    print " correct partial wrong (size) - ";
    print scalar @files;
    print " files found, sorted by mtime\n";
    foreach (sort {$s{$b}{correct} <=> $s{$a}{correct}} keys %s){
	printf "%5d  %5d  %5d (%d) %s\n",
	$s{$_}{correct},$s{$_}{partial},$s{$_}{incorrect},$s{$_}{N},$_;
    }
}
elsif ($sort eq 'partial'){
    print " correct partial wrong (size) - ";
    print scalar @files;
    print " files found, sorted by mtime\n";
    foreach (sort {$s{$b}{partial} <=> $s{$a}{partial}} keys %s){
	printf "%5d  %5d  %5d (%d) %s\n",
	$s{$_}{correct},$s{$_}{partial},$s{$_}{incorrect},$s{$_}{N},$_;
    }
}
else{
    print "   P      R      F   time - ";
    print scalar @files;
    print " files found, sorted by F-scores\n";
    foreach (sort {$s{$b}{F} <=> $s{$a}{F}} keys %s){
	printf "%3.2f  %3.2f  %3.2f  (%d) %s\n",
	$s{$_}{P},$s{$_}{R},$s{$_}{F},$s{$_}{N},$_;
    }
}

exit;














opendir(DIR, '.');
my @files = grep { /\.eval/ } readdir(DIR);
closedir DIR;
# my @files=`ls *eval*`;

my %s;

foreach my $f (@files){
    chomp $f;
    if (not -f $f){next;}
    my @result=();
    if ($f=~/\.gz$/){
#	@result=`gzip -cd $f \| grep -A2 'recall:'`;
	@result=`gzip -cd $f \| grep -A13 'size of gold standard:'`;
    }
    else{
#	@result=`grep -A2 'recall:' $f`;
	@result=`grep -A13 'size of gold standard:' $f`;
    }
    $f=~s/links\.//g;
    $f=~s/\.xml//g;
    $f=~s/\.eval//g;
    $f=~s/\.gz//g;
    if ($result[0]=~/size of gold standard:\s+([0-9]+)[\s\,]/s){$s{$f}{N}=$1;}
    if ($result[-3]=~/recall:\s+([0-9\.]+)\%/s){$s{$f}{R}=$1;}
    if ($result[-2]=~/precision:\s+([0-9\.]+)\%/s){$s{$f}{P}=$1;}
    if ($result[-1]=~/F:\s+([0-9\.]+)\%/s){$s{$f}{F}=$1;}
}

print "   P      R      F  (size) - ";
print scalar @files;
print " files found, sorted by F-scores\n";
foreach (sort {$s{$b}{F} <=> $s{$a}{F}} keys %s){
    printf "%3.2f  %3.2f  %3.2f (%d) %s\n",
    $s{$_}{P},$s{$_}{R},$s{$_}{F},$s{$_}{N},$_;
}