The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#! /usr/local/bin/perl -w
# (Updated: $Id: wps2sk.pl,v 1.8 2006/12/24 12:18:47 sidz1979 Exp $)
#
# wps2sk.pl version 0.09
#
# Program to convert SENSEVAL answer files with answers in the
# word#pos#sense format to mnemonics using the word#pos#sense -
# mnemonic mapping from WordNet.
#
# Copyright (c) 2005
#
# Ted Pedersen, University of Minnesota, Duluth
# tpederse@d.umn.edu
#
# Satanjeev Banerjee, Carnegie Mellon University
# banerjee+@cs.cmu.edu
#
# Siddharth Patwardhan, University of Utah
# sidd@cs.utah.edu
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#-----------------------------------------------------------------------------

# we have to use commandline options, so use the necessary package!
use Getopt::Long;

# now get the options!
&GetOptions("quiet", "help", "wnpath=s", "version");
our ($opt_quiet, $opt_help, $opt_wnpath, $opt_version);

# If the version information has been requested
if (defined $opt_version)
{
    $opt_version = 1;
    &printVersion();
    exit;
}

# if help has been requested, print out help!
if (defined $opt_help)
{
    $opt_help = 1;
    &showHelp();
    exit;
}

# Check if path to WordNet Data files has been provided ... If so ... save it.
if (defined $opt_wnpath)
{
    $wnPCPath   = $opt_wnpath;
    $wnUnixPath = $opt_wnpath;
}
else
{
    $wnPCPath =
      (defined $ENV{"WNHOME"})
      ? $ENV{"WNHOME"}
      : "C:\\Program Files\\WordNet\\1.7";
    $wnUnixPath =
      (defined $ENV{"WNHOME"}) ? $ENV{"WNHOME"} : "/usr/local/wordnet1.7";
    $wnPCPath =
      (defined $ENV{"WNSEARCHDIR"})
      ? $ENV{"WNSEARCHDIR"}
      : $wnPCPath . "\\dict";
    $wnUnixPath =
      (defined $ENV{"WNSEARCHDIR"})
      ? $ENV{"WNSEARCHDIR"}
      : $wnUnixPath . "/dict";
}

# Open sense index file
open(SENSE,      $wnUnixPath . "/index.sense")
  || open(SENSE, $wnPCPath . "\\sense.idx")
  || die "Unable to open sense index file.\n";

# Load up the sense file in a hash. Keys of the hash will be
# lemma::offset - This is unique to each sense key. Values of the hash
# will be the actual sense keys.

print STDERR "Loading sense index file... " if (!defined $opt_quiet);

while (<SENSE>)
{
    s/[\r\f\n]//g;

    if ($_ !~ /((\S+)%\S+)\s+(\d+)/)
    {
        print STDERR "Error in sense index file. ";
        print STDERR "Read \"$_\". Expecting \"<sense-key> <offset> ...\"\n";
        exit;
    }

    my $key   = $2 . "::" . $3;
    my $value = $1;

    if (defined $senseHash{$key}) { print STDERR "$key already defined!\n"; }
    $senseHash{$key} = $value;
}

print STDERR "done!\n" if (!defined $opt_quiet);

# done with the index.sense file
close(SENSE);

# now do everything for each part of speech!
my $pos;
my %mapHash = ();
foreach $pos ("noun", "verb", "adj", "adv")
{
    print STDERR "Processing index.$pos file... " if (!defined $opt_quiet);

    # open index file
    open(INDEX,      $wnUnixPath . "/index.$pos")
      || open(INDEX, $wnPCPath . "\\$pos.idx")
      || die "Unable to open index file.\n";

    while (<INDEX>)
    {
        next if (/^ /);
        s/[\r\f\n]//g;

        # get the lemma and the number of senses
        my ($lemma, $p, $noOfSenses, @rest) = split /\s+/;

        # for each sense of this lemma, create the key (for the above
        # hash) and find the
        for ($i = $#rest - $noOfSenses + 1 ; $i <= $#rest ; $i++)
        {
            my $key = $lemma . "::" . $rest[$i];
            if (!defined $senseHash{$key})
            {
                print STDERR "$key not defined!!\n";
                exit;
            }
            my $tmpNum = ($i - ($#rest - $noOfSenses + 1) + 1);
            $mapHash{"$lemma\#$p\#$tmpNum"} = $senseHash{$key};
        }
    }
    close(INDEX);
    print STDERR "done!\n" if (!defined $opt_quiet);
}

while (<>)
{
    s/[\r\f\n]//g;

    # get the index part and the answer part
    my @answers = ();
    if (/^(\S+\s+\S+)\s+(.*)\s*/)
    {
        my $part1 = $1;
        @answers = split(/\s+/, $2);
        $part1 =~ s/\.[nvar]//;
        print "$part1 ";
    }

    # convert answers
    my $ans;
    foreach $ans (@answers)
    {
        if (defined $mapHash{$ans}) { print "$mapHash{$ans} "; }
        else { print STDERR "Couldnt find mapping for $ans\n"; }
    }
    print "\n";
}

# function to output help messages for this program
sub showHelp
{
    print
"Creates a word#pos#sense to sensekey mapping of a Senseval-2 answer file\n";
    print "(output by disamb.pl).\n";
    print
"Usage: wps2sk.pl [ [ --wnpath WNPATH] [FILE...] | --help | --version ]\n";
    print
      "--wnpath         WNPATH specifies the path of the WordNet data files.\n";
    print
"                 Ordinarily, this path is determined from the \$WNHOME\n";
    print
      "                 environment variable. But this option overides this\n";
    print "                 behavior.\n";
    print "--quiet          Run in quiet mode -- do not print informational\n";
    print "                 messages.\n";
    print "--help           Displays this help screen.\n";
    print "--version        Displays version information.\n\n";
}

# function to output "ask for help" message when the user's goofed up!
sub askHelp
{
    print "Type wps2sk.pl --help for help.\n";
}

# Subroutine to print the version information
sub printVersion
{
    print "wps2sk.pl version 0.09\n";
    print
"Copyright (c) 2005 Ted Pedersen, Satanjeev Banerjee, and Siddharth Patwardhan.\n";
}


__END__


=head1 NAME

wps2sk.pl - Tool that converts the output from disamb.pl to a Senseval-2 format (such that it can
be easily evaluated by the Senseval-2 evaluation software).

=head1 SYNOPSIS

wps2sk.pl [ [ --wnpath WNPATH] [FILE...] | --help | --version ]

=head1 DESCRIPTION

Creates a word#pos#sense to sensekey mapping of a Senseval-2 answer file (output by disamb.pl). It takes
the output of disamb.pl and converts it to a format that is understood by the Senseval-2 evaluation software.

=head1 OPTIONS

wps2sk.pl [ [ --wnpath WNPATH] [FILE...] | --help | --version ]

B<--wnpath>=I<WNPATH>         
    WNPATH specifies the path of the WordNet data files. Ordinarily, this path is determined from the $WNHOME
    environment variable. But this option overides this behavior.

B<--help>
    Displays this help screen.

B<--version>
    Displays version information.

=head1 AUTHORS

 Ted Pedersen, University of Minnesota, Duluth
 tpederse at d.umn.edu

 Siddharth Patwardhan, University of Utah, Salt Lake City
 sidd at cs.utah.edu

 Satanjeev Banerjee, Carnegie Mellon University, Pittsburgh
 banerjee+ at cs.cmu.edu

=head1 KNOWN BUGS

None.

=head1 SEE ALSO

I<perl>(1pm)

I<WordNet::SenseRelate::TargetWord>(3pm)

I<WordNet::Similarity>(3pm)

L<http://www.cogsci.princeton.edu/~wn>

L<http://senserelate.sourceforge.net>

L<http://groups.yahoo.com/group/senserelate>

=head1 COPYRIGHT

Copyright (c) 2005 Ted Pedersen, Siddharth Patwardhan, Satanjeev
Banerjee

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

=cut