The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# WordNet::SenseRelate::Tools v0.09
# (Last updated $Id: Tools.pm,v 1.4 2006/12/24 12:18:45 sidz1979 Exp $)

package WordNet::SenseRelate::Tools;

use strict;
use warnings;
use Exporter;
use WordNet::QueryData;

our @ISA     = qw(Exporter);
our $VERSION = '0.09';

# Constructor for this module
sub new
{
    my $class = shift;
    my $wn    = shift;
    my $self  = {};

    # Create the preprocessor object
    $class = ref $class || $class;
    bless($self, $class);

    # Read in the wordnet data
    if (!defined $wn || !ref $wn || ref($wn) ne "WordNet::QueryData")
    {
        my $wnpath = undef;
        $wnpath = $wn if(defined $wn && !ref($wn) && $wn ne "");
        $wn = WordNet::QueryData->new($wnpath);
        return undef if (!defined $wn);
    }
    $self->{wn} = $wn;

    # Get the compounds from WordNet
    foreach my $pos ('n', 'v', 'a', 'r')
    {
        foreach my $word ($wn->listAllWords($pos))
        {
            $self->{compounds}->{$word} = 1 if ($word =~ /_/);
        }
    }

    return $self;
}

# Detect compounds in a block of text
sub compoundify
{
    my $self  = shift;
    my $block = shift;

    return $block
      if (!defined $block || !ref $self || !defined $self->{compounds});

    my $string;
    my $done;
    my $temp;
    my $firstPointer;
    my $secondPointer;
    my @wordsArray;

    # get all the words into an array
    @wordsArray = ();
    while ($block =~ /(\w+)/g)
    {
        push(@wordsArray, $1);
    }

    # now compoundify, GREEDILY!!
    $firstPointer = 0;
    $string       = "";

    while ($firstPointer <= $#wordsArray)
    {
        $secondPointer =
          (   ($#wordsArray > ($firstPointer + 7))
            ? ($firstPointer + 7)
            : ($#wordsArray));
        $done = 0;
        while ($secondPointer > $firstPointer && !$done)
        {
            $temp = join("_", @wordsArray[$firstPointer .. $secondPointer]);
            if (defined $self->{compounds}->{$temp})
            {
                $string .= "$temp ";
                $done = 1;
            }
            else
            {
                $secondPointer--;
            }
        }
        if (!$done)
        {
            $string .= "$wordsArray[$firstPointer] ";
        }
        $firstPointer = $secondPointer + 1;
    }
    $string =~ s/ $//;

    return $string;
}

1;

__END__

=head1 NAME

WordNet::SenseRelate::Tools - Perl modules that provides certain common WordNet tools.

=head1 SYNOPSIS

  use WordNet::SenseRelate::Tools;

  $wn = WordNet::SenseRelate::Tools->new();

  $newtext = $wn->compoundify($text);

=head1 DESCRIPTION

WordNet::SenseRelate::Tools is a set of common WordNet tools required in programs. Currently,
this module only contains a compound detection method. Other methods will be added in the future.
Additionally, this module will, most likely, be distributed independently of this package.

=head2 EXPORT

None by default.

=head1 SEE ALSO

perl(1)

WordNet::QueryData(3)

=head1 AUTHOR

Ted Pedersen, tpederse at d.umn.edu

Siddharth Patwardhan, sidd at cs.utah.edu

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2005 by Ted Pedersen and Siddharth Patwardhan

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.3 or,
at your option, any later version of Perl 5 you may have available.

=cut