The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -s
use Data::Dumper;
use Lingua::NATools::Lexicon;

use warnings;
use strict;

our ($h);

usage() if $h;

my $filename = shift;
usage() unless $filename;

my $lex = Lingua::NATools::Lexicon->new($filename);

my $size = $lex->size;

binmode STDOUT, ":utf8";
print "use utf8;\n";
print "{\n";

for (1..$size) {
    my $word  = $lex->word_from_id($_);
    my $count = $lex->id_count($_);

    printf("\t\"%s\" => $count,\n", quotemeta($word));
}
print "}\n";

$lex->close;


sub usage {
  print "nat-lex2perl: dumps a lexicon file as a Perl hash.\n\n";
  print "\tnat-lex2perl <file.lex>\n\n";
  print "For more help, please run 'perldoc nat-lex2perl'\n";
  exit(0);
}

__END__

=encoding UTF-8

=head1 NAME

nat-lex2perl - dumps a lexicon file as Perl hash.

=head1 SYNOPSIS

   nat-lex2perl <file.lex>

=head1 DESCRIPTION

This tool is used mainly for debug of lexicon files (C<file.lex>
files). Pass one as parameter and it will output a Data::Dumper file
with the lexicon information.

=head1 SEE ALSO

NATools documentation, perl(1)

=head1 AUTHOR

Alberto Manuel Brandão Simões, E<lt>ambs@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2006-2012 by Alberto Manuel Brandão Simões

=cut