# WordNet::get_wn_info.pm version 2.04
# (Last updated $Id: get_wn_info.pm,v 1.1 2008/03/27 05:13:01 sidz1979 Exp $)
#
# Package used by WordNet::Similarity::lesk module that
# computes semantic relatedness of word senses in WordNet
# using gloss overlaps.
#
# Copyright (c) 2005,
#
# Ted Pedersen, University of Minnesota, Duluth
# tpederse at d.umn.edu
#
# Satanjeev Banerjee, Carnegie Mellon University, Pittsburgh
# banerjee+ at cs.cmu.edu
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to
#
# The Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
#
# ------------------------------------------------------------------
package WordNet::get_wn_info;
=head1 NAME
WordNet::get_wn_info - Provides access to glosses related to a concept in WordNet
=head1 SYNOPSIS
use WordNet::get_wn_info;
my $gwn = WordNet::get_wn_info->new($wn, 0);
my $hypHash = $gwn->hype($synsHash, 0);
my $holHash = $gwn->holo($synsHash, 0);
=head1 DESCRIPTION
This module provides a layer between the user and the WordNet::QueryData
module, by providing easy access to glosses and synsets related to
particular concpets in WordNet.
=head2 Methods
=over
=cut
use WordNet::stem;
use strict;
use Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
@ISA = qw(Exporter);
%EXPORT_TAGS = ();
@EXPORT_OK = ();
@EXPORT = ();
$VERSION = '2.04';
=item new
Creates a new WordNet::get_wn_info object.
Parameters: $wn, $stemFlag
Returns: $gwn
=cut
# function to set up the wordnet object and the various boundary indices
sub new
{
my $className;
my $self = {};
my $wn;
my $stemmingReqd;
my $stemmer;
# get the class name
$className = shift;
# get wordnet object
$wn = shift;
$self->{'wn'} = $wn;
# check WordNet::QueryData version
$wn->VERSION(1.39);
# check if stemming called for
$stemmingReqd = shift;
$self->{'stem'} = $stemmingReqd;
if($stemmingReqd)
{
$stemmer = WordNet::stem->new($wn);
$self->{'stemmer'} = $stemmer;
}
# set up various boundaries.
$self->{'glosBoundaryIndex'} = 0;
$self->{'exampleBoundaryIndex'} = 0;
$self->{'synonymBoundaryIndex'} = 0;
bless($self, $className);
return $self;
}
=item hype
Returns the hypernyms of a synset.
Parameters: $synsHash, $ipType
Returns: $hypeHash
=cut
# NOTE: Thanks to Wybo Wiersma for contributing optimizations
# in the following code.
# function to take a set of synsets and to return their
# hypernyms. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub hype
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
# check if this is a request for the input-output types of this
# function
return(1, 1) if(defined($outprep));
my %newsynsetsh;
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Return error code instead of "exit"
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the hypernyms
my @hypernyms = $wn->querySense($syns, "hypes");
# put the hypernyms in a hash. this way we will avoid multiple
# copies of the same hypernym
my $temp;
foreach $temp (@hypernyms)
{
$newsynsetsh{$temp} = 1;
}
}
# return the hypernyms in an hash ref
return(\%newsynsetsh);
}
=item hypo
Returns the hyponyms of a synset.
Parameters: $synsHash, $ipType
Returns: $hypoHash
=cut
# function to take a set of synsets and to return their
# hyponyms. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub hypo
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
# check if this is a request for the input-output types of this
# function
return(1, 1) if(defined($outprep));
my %hyponymHash;
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the hyponyms
my @hyponyms = $wn->querySense($syns, "hypos");
# put the hyponyms in a hash. this way we will avoid multiple
# copies of the same hyponym
my $temp;
foreach $temp (@hyponyms)
{
$hyponymHash{$temp} = 1;
}
}
# return the hyponyms in an hash ref
return(\%hyponymHash);
}
=item holo
Returns the holonyms of a synset.
Parameters: $synsHash, $ipType
Returns: $holoHash
=cut
# function to take a set of synsets and to return their
# holonyms. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub holo
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
my %holonymHash = ();
# check if this is a request for the input-output types of this
# function
return(1, 1) if(defined($outprep));
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the holonyms
my @holonyms = $wn->querySense($syns, "holo");
# put the holonyms in a hash. this way we will avoid multiple
# copies of the same holonym
my $temp;
foreach $temp (@holonyms)
{
$holonymHash{$temp} = 1;
}
}
# return the holonyms in an hash ref
return(\%holonymHash);
}
=item mero
Returns the meronyms of a synset.
Parameters: $synsHash, $ipType
Returns: $meroHash
=cut
# function to take a set of synsets and to return their
# meronyms. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub mero
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
my %meronymHash = ();
# check if this is a request for the input-output types of this
# function
return (1, 1) if(defined($outprep));
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the meronyms
my @meronyms = $wn->querySense($syns, "mero");
# put the meronyms in a hash. this way we will avoid multiple
# copies of the same meronym
my $temp;
foreach $temp (@meronyms)
{
$meronymHash{$temp} = 1;
}
}
# return the meronyms in an hash ref
return(\%meronymHash);
}
=item attr
Returns the attributes of a synset.
Parameters: $synsHash, $ipType
Returns: $attrHash
=cut
# function to take a set of synsets and to return their
# attributes. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub attr
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
my %attrHash = ();
# check if this is a request for the input-output types of this
# function
return (1, 1) if(defined($outprep));
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the attrs
my @attrs = $wn->querySense($syns, "attr");
# put the attrs in a hash. this way we will avoid multiple
# copies of the same attr
my $temp;
foreach $temp (@attrs)
{
$attrHash{$temp} = 1;
}
}
# return the attrs in an hash ref
return(\%attrHash);
}
=item also
Returns the also-see of a synset.
Parameters: $synsHash, $ipType
Returns: $alsoHash
=cut
# function to take a set of synsets and to return their also-see
# synsets. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub also
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
my %alsoSeeHash = ();
# check if this is a request for the input-output types of this
# function
return (1, 1) if(defined($outprep));
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the also see synsets
my @alsoSees = $wn->queryWord($syns, "also");
# put the synsets in a hash. this way we will avoid multiple
# copies of the same synset
my $temp;
foreach $temp (@alsoSees)
{
$alsoSeeHash{$temp} = 1;
}
}
# return the synsets in an hash ref
return(\%alsoSeeHash);
}
=item deri
Returns the derived forms of a synset.
Parameters: $synsHash, $ipType
Returns: $deriHash
=cut
# function to take a set of words and to return their derived forms.
# both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub deri
{
my $self = shift;
my $wn = $self->{wn};
my ($wordsh, $outprep) = @_;
my %deriHash = ();
return (1, 1) if(defined($outprep));
foreach my $word (keys %{$wordsh})
{
# TODO: Replace error message and exit with return error code.
if($word !~ m/\#\w+\#\d+/)
{
print STDERR "$word is not in WORD#POS#SENSE format!\n";
exit 1;
}
my @deris = $wn->queryWord($word, "deri");
foreach my $temp (@deris)
{
$deriHash{$temp} = 1;
}
}
return(\%deriHash);
}
=item domn
Returns the domains of a synset.
Parameters: $synsHash, $ipType
Returns: $domnHash
=cut
# function to take a set of synsets and to return their domain
# synsets. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub domn
{
my $self = shift;
my $wn = $self->{wn};
my ($wordsh, $outprep) = @_;
my %domnHash = ();
return(1, 1) if(defined($outprep));
foreach my $word (keys %{$wordsh})
{
# TODO: Replace error message and exit with return error code.
if($word !~ m/\#\w+\#\d+/)
{
print STDERR "$word is not in WORD#POS#SENSE format!\n";
exit 1;
}
my @domns = $wn->queryWord($word, "domn");
foreach my $temp (@domns)
{
$domnHash{$temp} = 1;
}
}
return (\%domnHash);
}
=item domt
Returns the domain terms of a synset.
Parameters: $synsHash, $ipType
Returns: $domtHash
=cut
# function to take a set of synsets and to return their domain term
# synsets. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub domt
{
my $self = shift;
my $wn = $self->{wn};
my ($wordsh, $outprep) = @_;
my %domtHash = ();
return (1, 1) if(defined($outprep));
foreach my $word (keys %{$wordsh})
{
# TODO: Replace error message and exit with return error code.
if($word != m/\#\w+\#\d+/)
{
print STDERR "$word is not in WORD#POS#SENSE format!\n";
exit 1;
}
my @domts = $wn->queryWord ($word, "domt");
foreach my $temp (@domts)
{
$domtHash{$temp} = 1;
}
}
return (\%domtHash);
}
=item sim
Returns the similar-to synsets.
Parameters: $synsHash, $ipType
Returns: $simHash
=cut
# function to take a set of synsets and to return their similar-to
# synsets. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub sim
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
my %simHash = ();
# check if this is a request for the input-output types of this
# function
return (1, 1) if(defined($outprep));
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the sim synsets
my @sims = $wn->querySense($syns, "sim");
# put the synsets in a hash. this way we will avoid multiple
# copies of the same synset
my $temp;
foreach $temp (@sims)
{
$simHash{$temp} = 1;
}
}
# return the synsets in an hash ref
return(\%simHash);
}
=item enta
Returns the entailment of a synset.
Parameters: $synsHash, $ipType
Returns: $entaHash
=cut
# function to take a set of synsets and to return their entailment
# synsets. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub enta
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
my %entailsHash = ();
# check if this is a request for the input-output types of this
# function
return (1, 1) if(defined($outprep));
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the entails synsets
my @entails = $wn->querySense($syns, "enta");
# put the entails synsets in a hash. this way we will avoid
# multiple copies of the same entails synset
my $temp;
foreach $temp (@entails)
{
$entailsHash{$temp} = 1;
}
}
# return the causs in an hash ref
return(\%entailsHash);
}
=item caus
Returns the cause of a synset.
Parameters: $synsHash, $ipType
Returns: $causHash
=cut
# function to take a set of synsets and to return their cause
# synsets. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub caus
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
my %causeHash = ();
# check if this is a request for the input-output types of this
# function
return(1, 1) if(defined($outprep));
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the cause synsets
my @cause = $wn->querySense($syns, "caus");
# put the cause synsets in a hash. this way we will avoid
# multiple copies of the same cause synset
my $temp;
foreach $temp (@cause)
{
$causeHash{$temp} = 1;
}
}
# return the causs in an hash ref
return(\%causeHash);
}
=item part
Returns the participles of a synset.
Parameters: $synsHash, $ipType
Returns: $partHash
=cut
# function to take a set of synsets and to return their participle
# synsets. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub part
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
my %partHash = ();
# check if this is a request for the input-output types of this
# function
return(1, 1) if(defined($outprep));
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the part synsets
my @part = $wn->queryWord($syns, "part");
# put the part synsets in a hash. this way we will avoid
# multiple copies of the same part synset
my $temp;
foreach $temp (@part)
{
$partHash{$temp} = 1;
}
}
# return the causs in an hash ref
return(\%partHash);
}
=item pert
Returns the pertainyms of a synset.
Parameters: $synsHash, $ipType
Returns: $pertHash
=cut
# function to take a set of synsets and to return their pertainym
# synsets. both input and output will be arrays of fully qualified
# WordNet senses (in WORD#POS#SENSE format).
sub pert
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
my %pertHash = ();
# check if this is a request for the input-output types of this
# function
return (1, 1) if(defined($outprep));
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the pert synsets
my @pert = $wn->queryWord($syns, "pert");
# put the pert synsets in a hash. this way we will avoid
# multiple copies of the same pert synset
my $temp;
foreach $temp (@pert)
{
$pertHash{$temp} = 1;
}
}
# return the causs in an hash ref
return(\%pertHash);
}
=item glos
Returns the gloss of a synset.
Parameters: $synsHash, $ipType
Returns: $glossString
=cut
# function to take a set of synsets and to return the concatenation of
# their glosses
sub glos
{
my $self = shift;
my $wn = $self->{'wn'};
my $stemmer = $self->{'stemmer'};
my ($synsetsh, $outprep) = @_;
my $returnString = "";
# check if this is a request for the input-output types of this
# function
return (1, 2) if(defined($outprep));
my @synshkeys = keys %{$synsetsh};
my $i = 0;
foreach my $syns (@synshkeys)
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the glos
my $glosString;
($glosString) = $wn->querySense($syns, "glos");
# regularize the glos
$glosString =~ s/\".*//;
# get rid of most punctuation
$glosString =~ tr/.;:,?!(){}\x22\x60\x24\x25\x40<>/ /;
# get rid of apostrophes not surrounded by word chars
$glosString =~ s/(?<!\w)\x27/ /g;
$glosString =~ s/\x27(?!\w)/ /g;
# remove dashes, but not hyphens
$glosString =~ s/--/ /g;
# this causes "plane's" to become "plane s"
# $glosString =~ s/[^\w]/ /g;
$glosString =~ s/\s+/ /g;
$glosString = lc $glosString;
# stem the glos if asked for
$glosString = $stemmer->stemString($glosString, 1) if($self->{stem});
$glosString =~ s/^\s*/ /;
$glosString =~ s/\s*$/ /;
# append to return string
$returnString .= $glosString;
# put in boundary if more glosses coming!
if($i < $#synshkeys)
{
my $boundary = sprintf("GGG%05dGGG", $self->{'glosBoundaryIndex'});
$returnString .= $boundary;
($self->{'glosBoundaryIndex'})++;
}
$i++;
}
# and we are done!
return($returnString);
}
=item example
Returns the example of a synset.
Parameters: $synsHash, $ipType
Returns: $example
=cut
# function to take a set of synsets and to return the concatenation of
# their example strings
sub example
{
my $self = shift;
my $wn = $self->{'wn'};
my $stemmer = $self->{'stemmer'};
my ($synsetsh, $outprep) = @_;
my @exampleStrings = ();
# check if this is a request for the input-output types of this
# function
return (1, 2) if(defined($outprep));
# first get all the example strings into an array
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the glos
my $exampleString;
($exampleString) = $wn->querySense($syns, "glos");
# check if this has any examples
if($exampleString !~ /\"/) {next;}
while($exampleString =~ /\"([^\"]*)\"/g)
{
push @exampleStrings, $1;
}
}
# now put the example strings together to form the return
# string. separate examples with the example boundary
my $returnString = "";
my $i;
for ($i = 0; $i <= $#exampleStrings; $i++)
{
# preprocess
###
# get rid of most punctuation
$exampleStrings[$i] =~ tr/.;:,?!(){}\x22\x60\x24\x25\x40<>/ /;
# get rid of apostrophes not surrounded by word chars
$exampleStrings[$i] =~ s/(?<!\w)\x27/ /g;
$exampleStrings[$i] =~ s/\x27(?!\w)/ /g;
# remove dashes, but not hyphens
$exampleStrings[$i] =~ s/--/ /g;
###$exampleStrings[$i] =~ s/[^\w]/ /g;
$exampleStrings[$i] =~ s/\s+/ /g;
$exampleStrings[$i] =~ s/^\s*/ /;
$exampleStrings[$i] =~ s/\s*$/ /;
$exampleStrings[$i] = lc($exampleStrings[$i]);
# stem if so required
$exampleStrings[$i] = $stemmer->stemString($exampleStrings[$i], 1)
if($self->{'stem'});
$exampleStrings[$i] =~ s/^\s*/ /;
$exampleStrings[$i] =~ s/\s*$/ /;
# append to $returnString
$returnString .= $exampleStrings[$i];
# put in boundary if more examples coming!
if($i < $#exampleStrings)
{
my $boundary = sprintf("EEE%05dEEE", $self->{'exampleBoundaryIndex'});
$returnString .= $boundary;
($self->{'exampleBoundaryIndex'})++;
}
}
# and we are done!
return($returnString);
}
=item syns
Returns the words in the synset.
Parameters: $synsHash, $ipType
Returns: $wordString
=cut
# function to take a set of synsets and to return the concatenation of
# all the words in them. repeated words are returned only once.
sub syns
{
my $self = shift;
my $wn = $self->{'wn'};
my ($synsetsh, $outprep) = @_;
my $returnString = "";
# check if this is a request for the input-output types of this
# function
return (1, 2) if(defined($outprep));
my %synonymHash = ();
foreach my $syns (keys %{$synsetsh})
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($syns !~ /\#\w\#\d+/)
{
print STDERR "$syns is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the words
my @synsetWords = $wn->querySense($syns, "syns");
# for each word, remove the POS and SENSE, and put only the
# word in a hash
my $temp;
foreach $temp (@synsetWords)
{
$temp =~ s/\#.*//;
$synonymHash{$temp} = 1;
}
}
# now get hold of all the words in sorted order
my @synonymArray = sort keys %synonymHash;
# concatenate them, using the synonym boundary
for(my $i = 0; $i <= $#synonymArray; $i++)
{
$synonymArray[$i] =~ s/ /_/g;
$returnString .= " $synonymArray[$i] ";
# put in boundary if more examples coming!
if($i < $#synonymArray)
{
my $boundary = sprintf("SSS%05dSSS", $self->{synonymBoundaryIndex});
$returnString .= $boundary;
($self->{synonymBoundaryIndex})++;
}
}
# and we are done!
return($returnString);
}
=item glosexample
Returns the gloss and example of a synset.
Parameters: $synsHash, $ipType
Returns: $glosExampleString
=back
=cut
# function to take a set of synsets and to return the concatenation of
# their glosses (including the examples)
sub glosexample
{
my $self = shift;
my $wn = $self->{wn};
my $stemmer = $self->{stemmer};
my ($synsetsh, $outprep) = @_;
my $returnString = "";
# check if this is a request for the input-output types of this
# function
return (1, 2) if(defined($outprep));
my @synshkeys = keys %{$synsetsh};
for(my $i = 0; $i < scalar(@synshkeys); $i++)
{
# check if in word-pos-sense format
# TODO: Replace error message and exit with return error code.
if($synshkeys[$i] !~ /\#\w\#\d+/)
{
print STDERR "$synshkeys[$i] is not in WORD\#POS\#SENSE format!\n";
exit;
}
# get the glos
my $glosString;
($glosString) = $wn->querySense($synshkeys[$i], "glos");
# regularize the glos
###$glosString =~ s/\'//g;
###$glosString =~ s/[^\w]/ /g;
# get rid of most punctuation
$glosString =~ tr/.;:,?!(){}\x22\x60\x24\x25\x40<>/ /;
# get rid of apostrophes not surrounded by word chars
$glosString =~ s/(?<!\w)\x27/ /g;
$glosString =~ s/\x27(?!\w)/ /g;
# remove dashes, but not hyphens
$glosString =~ s/--/ /g;
###
$glosString =~ s/\s+/ /g;
$glosString = lc $glosString;
# stem the glos if asked for
$glosString = $stemmer->stemString($glosString, 1) if($self->{stem});
$glosString =~ s/^\s*/ /;
$glosString =~ s/\s*$/ /;
# append to return string
$returnString .= $glosString;
# put in boundary if more glosses coming!
if($i < $#synshkeys)
{
my $boundary = sprintf("XXX%05dXXX", $self->{glosBoundaryIndex});
$returnString .= $boundary;
($self->{glosBoundaryIndex})++;
}
}
# and we are done!
return($returnString);
}
1;
__END__
=head1 SEE ALSO
perl(1), WordNet::Similarity(3), WordNet::QueryData(3)
http://www.cs.utah.edu/~sidd
http://wordnet.princeton.edu
http://www.ai.mit.edu/~jrennie/WordNet
http://groups.yahoo.com/group/wn-similarity
=head1 AUTHORS
Ted Pedersen, University of Minnesota Duluth
tpederse at d.umn.edu
Satanjeev Banerjee, Carnegie Mellon University, Pittsburgh
banerjee+ at cs.cmu.edu
=head1 BUGS
None.
To report bugs, go to http://groups.yahoo.com/group/wn-similarity/ or
e-mail "S<tpederse at d.umn.edu>".
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2005, Ted Pedersen and Satanjeev Banerjee
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to
The Free Software Foundation, Inc.,
59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.
Note: a copy of the GNU General Public License is available on the web
at L<http://www.gnu.org/licenses/gpl.txt> and is included in this
distribution as GPL.txt.
=cut