The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -s

use strict;
use warnings;

use DBI;

our ($h);

if ($h) {
    print STDERR "nat-ngramsIdx: Indexes a ngrams SQLite database\n";
    print STDERR "\n";
    print STDERR "For more help, please run 'perldoc nat-ngramsIdx'\n";

    exit;
}

my $file = shift;

die "Can't find that file\n" if (!$file || !-f$file);

my $dbh = DBI->connect("dbi:SQLite:dbname=${file}_","","");

my $size = -1;
$size = 2 if $file =~ m!2!;
$size = 3 if $file =~ m!3!;
$size = 4 if $file =~ m!4!;
die "Strange ngrams filename\n" unless $size > 1;

my $s;

my @COLS=(undef,"word1","word2","word3","word4");
my @TBS=(undef,undef,"bigrams","trigrams","tetragrams");
my @SCOLS=(undef,"a","b","c","d");
my @IDX=(undef,undef,"bi","tri","tet");

print STDERR "Creating new table...\n";
$s = $dbh->prepare("CREATE TABLE tmp (".
                   join(",",map {"$_ INTEGER"} @COLS[1..$size]).
                   ",occs INTEGER);");
die unless $s;
$s -> execute();

print STDERR "Attaching old table...\n";
$s = $dbh->prepare("ATTACH \"$file\" AS O;");
die unless $s;
$s -> execute();

print STDERR "Selecting ordered values into new table...\n";
$s = $dbh->prepare("INSERT INTO tmp SELECT * FROM O.$TBS[$size] ORDER BY occs DESC;");
die unless $s;
$s -> execute();

print STDERR "Detaching old table...\n";
$s = $dbh->prepare("DETACH O;");
die unless $s;
$s -> execute();

print STDERR "Renaming temporary table...\n";
$s = $dbh->prepare("ALTER TABLE tmp RENAME TO $TBS[$size];");
die unless $s;
$s -> execute();

for (1..$size) {
  my $idx = $IDX[$size]."_".$SCOLS[$_];
  print STDERR "Creating idx $idx\n";
  $s = $dbh->prepare("CREATE INDEX $idx ON $TBS[$size](word$_);");
  die unless $s;
  $s -> execute();
}

print STDERR "Moving file...\n";
`mv ${file}_ $file`;


=encoding UTF-8

=head1 NAME

nat-ngramsIdx - Indexes a ngrams SQLite file

=head1 SYNOPSIS

   nat-ngramsIdx source.2.ngrams

=head1 DESCRIPTION

This is an utility tool to create indexes in ngrams SQLite
files. Normally you do not need to use it directly.

=head1 SEE ALSO

NATools documentation

=head1 AUTHOR

Alberto Manuel Brandão Simões, E<lt>ambs@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2007-2012 by Alberto Manuel Brandão Simões

=cut