The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Lingua::JA::WordNet;

use 5.008_008;
use strict;
use warnings;

use DBI;
use Carp ();
use File::ShareDir  ();
use List::MoreUtils ();

our $VERSION = '0.21';

my $DB_FILE = 'wnjpn-1.1_and_synonyms-1.0.db';


sub _options
{
    return {
        data        => File::ShareDir::dist_file('Lingua-JA-WordNet', $DB_FILE),
        enable_utf8 => 0,
        verbose     => 0,
    };
}

sub new
{
    my $class = shift;

    my $options = $class->_options;

    if (scalar @_ == 1) { $options->{data} = shift; }
    else
    {
        my %args = @_;

        for my $key (keys %args)
        {
            if ( ! exists $options->{$key} ) { Carp::croak "Unknown option: '$key'"; }
            else                             { $options->{$key} = $args{$key};       }
        }
    }

    Carp::croak 'WordNet data file is not found' unless -f $options->{data};

    my $dbh = DBI->connect("dbi:SQLite:dbname=$options->{data}", '', '', {
        #Warn           => 0, # get rid of annoying disconnect message
        # The Warn attribute enables useful warnings for certain bad practices.
        # It is enabled by default and should only be disabled in rare circumstances.
        # (see http://search.cpan.org/dist/DBI/DBI.pm#Warn)

        RaiseError     => 1,
        PrintError     => 0,
        AutoCommit     => 0,
        sqlite_unicode => $options->{enable_utf8},
    });

    bless { dbh => $dbh, verbose => $options->{verbose} }, $class;
}

sub DESTROY { shift->{dbh}->disconnect; }

sub Word
{
    my ($self, $synset, $lang) = @_;

    $lang = 'jpn' unless defined $lang;

    my $sth
        = $self->{dbh}->prepare
        (
            'SELECT lemma FROM word JOIN sense ON word.wordid = sense.wordid
              WHERE synset     = ?
                AND sense.lang = ?'
        );

    $sth->execute($synset, $lang);

    my @words = map { $_->[0] =~ tr/_/ /; $_->[0]; } @{$sth->fetchall_arrayref};

    Carp::carp "Word: there are no words for $synset in $lang" if $self->{verbose} && ! scalar @words;

    return @words;
}

sub Synset
{
    my ($self, $word, $lang) = @_;

    $lang = 'jpn' unless defined $lang;

    my $sth
        = $self->{dbh}->prepare
        (
            'SELECT synset FROM word LEFT JOIN sense ON word.wordid = sense.wordid
              WHERE lemma      = ?
                AND sense.lang = ?'
        );

    $sth->execute($word, $lang);

    my @synsets = map {$_->[0]} @{$sth->fetchall_arrayref};

    Carp::carp "Synset: there are no synsets for '$word' in $lang" if $self->{verbose} && ! scalar @synsets;

    return @synsets;
}

sub SynPos
{
    my ($self, $word, $pos, $lang) = @_;

    $lang = 'jpn' unless defined $lang;

    my $sth
        = $self->{dbh}->prepare
        (
            'SELECT synset FROM word LEFT JOIN sense ON word.wordid = sense.wordid
              WHERE lemma      = ?
                AND word.pos   = ?
                AND sense.lang = ?'
        );

    $sth->execute($word, $pos, $lang);

    my @synsets = map {$_->[0]} @{$sth->fetchall_arrayref};

    Carp::carp "SynPos: there are no synsets for '$word' corresponding to '$pos' and '$lang'" if $self->{verbose} && ! scalar @synsets;

    return @synsets;
}

sub Pos
{
    my ($self, $synset) = @_;
    return $1 if $synset =~ /^[0-9]{8}-([arnv])$/;
    Carp::carp "Pos: '$synset' is wrong synset format" if $self->{verbose};
    return;
}

sub Rel
{
    my ($self, $synset, $rel) = @_;

    my $sth
        = $self->{dbh}->prepare
        (
            'SELECT synset2 FROM synlink
              WHERE synset1 = ?
                AND link    = ?'
        );

    $sth->execute($synset, $rel);

    my @synsets = map {$_->[0]} @{$sth->fetchall_arrayref};

    Carp::carp "Rel: there are no $rel links for $synset" if $self->{verbose} && ! scalar @synsets;

    return @synsets;
}

sub Def
{
    my ($self, $synset, $lang) = @_;

    $lang = 'jpn' unless defined $lang;

    my $sth
        = $self->{dbh}->prepare
        (
            'SELECT sid, def FROM synset_def
              WHERE synset = ?
                AND lang   = ?'
        );

    $sth->execute($synset, $lang);

    my @defs;

    while (my $row = $sth->fetchrow_arrayref)
    {
        my ($sid, $def) = @{$row};
        $defs[$sid] = $def;
    }

    Carp::carp "Def: there are no definition sentences for $synset in $lang" if $self->{verbose} && ! scalar @defs;

    return @defs;
}

sub Ex
{
    my ($self, $synset, $lang) = @_;

    $lang = 'jpn' unless defined $lang;

    my $sth
        = $self->{dbh}->prepare
        (
            'SELECT sid, def FROM synset_ex
              WHERE synset = ?
                AND lang   = ?'
        );

    $sth->execute($synset, $lang);

    my @exs;

    while (my $row = $sth->fetchrow_arrayref)
    {
        my ($sid, $ex) = @{$row};
        $exs[$sid] = $ex;
    }

    Carp::carp "Ex: there are no example sentences for $synset in $lang" if $self->{verbose} && ! scalar @exs;

    return @exs;
}

sub AllSynsets
{
    my $self = shift;
    my $sth = $self->{dbh}->prepare('SELECT synset FROM synset');
    $sth->execute;
    my @synsets = map {$_->[0]} @{$sth->fetchall_arrayref};
    return \@synsets;
}

sub WordID
{
    my ($self, $word, $pos, $lang) = @_;

    $word =~ tr/ /_/;
    $lang = 'jpn' unless defined $lang;

    my $sth
        = $self->{dbh}->prepare
        (
            'SELECT wordid FROM word
              WHERE lemma = ?
                AND pos   = ?
                AND lang  = ?'
        );

    $sth->execute($word, $pos, $lang);

    my ($wordid) = map {$_->[0]} @{$sth->fetchall_arrayref};

    Carp::carp "WordID: there is no WordID for '$word' corresponding to '$pos' and '$lang'" if $self->{verbose} && ! defined $wordid;

    return $wordid;
}

sub Synonym
{
    my ($self, $wordid) = @_;

    my $sth
        = $self->{dbh}->prepare
        (
            'SELECT lemma FROM word JOIN wordlink ON word.wordid = wordlink.wordid2
              WHERE wordid1 = ?
                AND link    = ?'
        );

    $sth->execute($wordid, 'syns');
    my @synonyms = map {$_->[0]} @{$sth->fetchall_arrayref};

    Carp::carp "Synonyms: there are no Synonyms for $wordid" if $self->{verbose} && ! scalar @synonyms;

    # 一応順番を保持したいのでハッシュスライスは使わない
    # uniq: The order of elements in the returned list is the same as in LIST.
    return List::MoreUtils::uniq @synonyms;
}

1;

__END__

=encoding utf8

=head1 NAME

Lingua::JA::WordNet - Perl OO interface to Japanese WordNet database

=for test_synopsis
my ($db_path, %config, $synset, $lang, $pos, $rel);

=head1 SYNOPSIS

  use Lingua::JA::WordNet;

  my $wn = Lingua::JA::WordNet->new;
  my @synsets = $wn->Synset('相撲');
  my @hypes   = $wn->Rel($synsets[0], 'hype');
  my @words   = $wn->Word($hypes[0]);

  print "$words[0]\n";
  # -> レスリング

  # Synonym method can access to Japanese WordNet Synonyms Database.
  my $wordID   = $wn->WordID('ねんねこ', 'n');
  my @synonyms = $wn->Synonym($wordID);

  print "@synonyms\n";
  # -> お休み ねね スリープ 就眠 御休み 眠り 睡り 睡眠

=head1 DESCRIPTION

Japanese WordNet is a semantic dictionary of Japanese.
Lingua::JA::WordNet is yet another Perl module to look up
entries in Japanese WordNet.

The original Perl module is WordNet::Multi.
WordNet::Multi is awkward to use and no longer maintained.
Because of this, I uploaded this module.

=head1 METHODS

=head2 $wn = new($db_path) or new(%config)

Creates a new Lingua::JA::WordNet instance.

  my $wn = Lingua::JA::WordNet->new(
      data        => $db_path, # default is File::ShareDir::dist_file('Lingua-JA-WordNet', 'wnjpn-1.1_and_synonyms-1.0.db')
      enable_utf8 => 1,        # default is 0 (see sqlite_unicode attribute of DBD::SQLite)
      verbose     => 0,        # default is 0 (all warnings are ignored)
  );

The data must be Japanese WordNet and English WordNet in an SQLite3 database.


=head2 @words = $wn->Word( $synset [, $lang] )

Returns the words corresponding to $synset and $lang.

=head2 @synsets = $wn->Synset( $word [, $lang] )

Returns the synsets corresponding to $word and $lang.

=head2 @synsets = $wn->SynPos( $word, $pos [, $lang] )

Returns the synsets corresponding to $word, $pos and $lang.

=head2 $pos = $wn->Pos($synset)

Returns the part of speech of $synset.

=head2 @synsets = $wn->Rel($synset, $rel)

Returns the relational synsets corresponding to $synset and $rel.

=head2 @defs = $wn->Def( $synset [, $lang] )

Returns the definition sentences corresponding to $synset and $lang.

=head2 @exs = $wn->Ex( $synset [, $lang] )

Returns the example sentences corresponding to $synset and $lang,

=head2 $allsynsets_arrayref = $wn->AllSynsets()

Returns all synsets.

=head2 $wordID = $wn->WordID( $word, $pos [, $lang] )

Returns the word ID corresponding to $word, $pos and $lang.

=head2 @synonyms = $wn->Synonym($wordID)

Returns the synonyms of $wordID.

This method works only under the bundled Japanese WordNet database file.


=head2 LANGUAGES

$lang can take 'jpn' or 'eng'. The default value is 'jpn'.


=head2 PARTS OF SPEECH

$pos can take the left side values of the following table.

  a|adjective
  r|adverb
  n|noun
  v|verb
  a|形容詞
  r|副詞
  n|名詞
  v|動詞

This is the result of the SQL query 'SELECT pos, def FROM pos_def'.


=head2 RELATIONS

$rel can take the left side values of the following table.

  also|See also
  syns|Synonyms
  hype|Hypernyms
  inst|Instances
  hypo|Hyponym
  hasi|Has Instance
  mero|Meronyms
  mmem|Meronyms --- Member
  msub|Meronyms --- Substance
  mprt|Meronyms --- Part
  holo|Holonyms
  hmem|Holonyms --- Member
  hsub|Holonyms --- Substance
  hprt|Holonyms -- Part
  attr|Attributes
  sim|Similar to
  enta|Entails
  caus|Causes
  dmnc|Domain --- Category
  dmnu|Domain --- Usage
  dmnr|Domain --- Region
  dmtc|In Domain --- Category
  dmtu|In Domain --- Usage
  dmtr|In Domain --- Region
  ants|Antonyms

This is the result of the SQL query 'SELECT link, def FROM link_def'.


=head1 Out of memory!

In rare cases, this error message is displayed during the installation of this library.
If this is displayed, please install this library manually. (RT#82276)


=head1 AUTHOR

pawa E<lt>pawapawa@cpan.orgE<gt>

=head1 SEE ALSO

Japanese WordNet: L<http://nlpwww.nict.go.jp/wn-ja/>

L<http://twitter.com/LinguaJAWordNet>

=head1 LICENSE

This library except the bundled WordNet database file is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

The bundled WordNet database file complies with the following licenses:

=over 4

=item * For Japanese data: L<http://nlpwww.nict.go.jp/wn-ja/license.txt>

=item * For English data: L<http://wordnet.princeton.edu/wordnet/license/>

=back


=cut