The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#! /usr/bin/perl -w
#
# rankFormat.pl version 2.05
# (Last updated $Id: rankFormat.pl,v 1.6 2008/05/30 23:12:43 sidz1979 Exp $)
#
# -----------------------------------------------------------------------------

use strict;
use Getopt::Long;

# Declarations!
our ($opt_version, $opt_help);
my (@parts, %hash);

# Get the options!
&GetOptions("version", "help");

# If the version information has been requested...
if(defined $opt_version)
{
  &printVersion();
  exit;
}

# If detailed help has been requested...
if(defined $opt_help)
{
  &printHelp();
  exit;
}
while(<>)
{
  s/[\r\f\n]//;
  s/^\s+//;
  s/\s+$//;
  @parts = split(/\s+/);
  $hash{"$parts[0]<>$parts[1]<>"} = sprintf("%.2f", $parts[2]) if(@parts);
}
print "0\n";
my $lastRank = 1;
my $rank = 1;
my $last;
foreach my $key (sort {$hash{$b} <=> $hash{$a}} keys %hash)
{
  $lastRank = $rank if(defined $last && $hash{$key} != $last);
  print "$key$lastRank $hash{$key} 0 0 0\n";
  $rank++;
  $last = $hash{$key};
}

# ----------------- Subroutines Start Here ----------------------
# Subroutine to print detailed help
sub printHelp
{
  &printUsage();
  print "\nProgram to rank the output of similarity.pl based on the semantic\n";
  print "relatedness of the word pairs. The output is in a format that can\n";
  print "processed by the rank.pl program of the Text::NSP package. The rank.pl\n";
  print "program takes two lists of ranked pairs of words and computes the\n";
  print "correlation between them according to Spearman's correlation\n";
  print "coefficient. The input can be specified in file(s) on the command line,\n";
  print "or if no file(s) are specified, then STDIN is assumed. Output is written\n";
  print "to STDOUT.\n\n";
  print "Options: \n";
  print "--help           Displays this help screen.\n";
  print "--version        Displays version information.\n\n";
}

# Subroutine to print minimal usage notes
sub minimalUsageNotes
{
  &printUsage();
  print "Type rankFormat.pl --help for detailed help.\n";
}

# Subroutine that prints the usage
sub printUsage
{
  print "Usage: rankFormat.pl [FILE... |--help | --version]\n";
}

# Subroutine to print the version information
sub printVersion
{
  print "rankFormat.pl version 2.05\n";
  print "Copyright (c) 2006-2008, Ted Pedersen and Siddharth Patwardhan.\n";
}

__END__

=head1 NAME

rankFormat.pl - Measure the correlation between two different rankings
of word pairs measured for semantic relatedness. 

=head1 SYNOPSIS

 rankFormat.pl [FILE... | --help | --version]

=head1 DESCRIPTION

Program to rank the output of similarity.pl based on the semantic
relatedness of the word pairs. The output is in a format that can
processed by the rank.pl program of the L<Text::NSP> package. The rank.pl
program takes two lists of ranked pairs of words and computes the
correlation between them according to Spearman's correlation
coefficient. 

This can be used for comparing the output of two different measures
on the same set of word pairs, or for comparing the output of
similarity.pl with a human created gold standard (such as the
Miller and Charles 30 word pair data). 

=head1 OPTIONS

B<--help>

    Displays a detailed usage message

B<--version>

    Displays version information.

=head1 BUGS

Report to WordNet::Similarity mailing list :
 L<http://groups.yahoo.com/group/wn-similarity>

=head1 SEE ALSO

WordNet home page : 
 L<http://wordnet.princeton.edu>

WordNet::Similarity home page :
 L<http://wn-similarity.sourceforge.net>

More info about Text::NSP 
 L<http://ngram.sourceforge.net>

=head1 AUTHORS

 Ted Pedersen, University of Minnesota Duluth
 tpederse at d.umn.edu

 Siddharth Patwardhan, University of Utah, Salt Lake City
 sidd at cs.utah.edu

=head1 COPYRIGHT

Copyright (c) 2006-2008, Ted Pedersen and Siddharth Patwardhan

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

=cut