The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Lingua::TreeTagger::Installer;

use warnings;
use strict;

our $VERSION = '0.50_2';

use Clone 'clone';
use YAML;
use LWP::Simple;
use IO::Uncompress::Gunzip 'gunzip';
use File::Copy qw'move cp';
use File::Temp 'tempdir';
use File::Basename;
use Archive::Any;

our $data;

{
    local $/ = "";
    $data = Load( <DATA> );
}

sub custom_install {
    my ($self, $from, $to) = @_;

    $self->verbose(" - installing", basename($from), "into $to");
    $to .= "/" unless $to =~ m{/$};
    $to .= basename($from);
    if (-f $to && not $self->{force}) {
        die "Target file exists.\nProbably another version of parameter file was already installed.\nWon't install without force (-f)\n";
    }
    cp $from => $to or die "Installation failed. You have write permissions?\n";
}

sub install_to_cmd {
    my ($self, $file) = @_;

    require Lingua::TreeTagger::Installer::ConfigData;
    my $treebin = Lingua::TreeTagger::Installer::ConfigData->config('treebin');

    $self->custom_install($file, $treebin);
    chmod 0755, "$treebin/" . basename($file);
}

sub install_to_bin {
    my ($self, $file) = @_;

    require Lingua::TreeTagger::Installer::ConfigData;
    my $bin = Lingua::TreeTagger::Installer::ConfigData->config('bin');

    $self->custom_install($file, $bin);
    chmod 0755, "$bin/" . basename($file);
}

sub install_to_lib {
    my ($self, $file) = @_;

    require Lingua::TreeTagger::Installer::ConfigData;
    my $treelib = Lingua::TreeTagger::Installer::ConfigData->config('treelib');

    $self->custom_install($file, $treelib);
}

sub new {
    my $class = shift;
    my %ops = @_;
    local $/ = "";
    open R, "<", "$ENV{HOME}/.treetagger" or die "Can't find .treetagger config file\n";
    my $data = Load( <R> );
    close R;

    my $self = { data => $data };

    for my $l (keys %{$data->{languages}}) {
        for my $e (@{$data->{languages}{$l}}) {
            $self->{lookup}{$e->{id}} = clone $e;
            $self->{lookup}{$e->{id}}{lang} = $l;
        }
    }

    $self->{verbose}++ if $ops{verbose};
    $self->{force}++   if $ops{force};

    return bless $self => $class
}

sub verbose {
    my ($self, @message) = @_;
    if ($self->{verbose}) {
        print STDERR join(" " => @message), "\n";
    }
}

sub list_parameter_files {
    my $self = shift;
    my $count = 0;
    print "Listing available and installed parameter files\n";
    for my $lang (keys %{$self->{data}{languages}}) {
        for my $par (@{$self->{data}{languages}{$lang}}) {
            my $author = exists($par->{author}) ? " by $par->{author}" : "";
            my $parfile = exists($par->{parfile}) ? $par->{parfile} : undef;
            if ($parfile) {
                print "$par->{id} [INSTALLED] $lang ($par->{encoding})$author as $parfile.\n";
            } else {
                print "$par->{id} [AVAILABLE] $lang ($par->{encoding})$author.\n";
            }
        }
    }
}

sub installed {
    my ($self, $parameter, $filename) = @_;
    my $l = $self->{lookup}{$parameter}{lang};
    for my $e (@{$self->{data}{languages}{$l}}) {
        $e->{parfile} = $filename if $e->{id} eq $parameter;
    }
    $self->save;
}

sub install {
    my ($self, $parameter) = @_;

    $parameter = uc $parameter;
    return "unknown code" unless exists($self->{lookup}{$parameter});

    my $url  = $self->{lookup}{$parameter}{url};
    my $lang = $self->{lookup}{$parameter}{lang};
    my $filename = $url;
    $filename =~ s{^.*/}{};

    $self->verbose(" - downloading into $filename");
    getstore $url => $filename;

    if (exists($self->{lookup}{$parameter}{manifest})) {
        if ($filename =~ /tar.gz$/) {
            my $tgz = Archive::Any->new( $filename );

            my $dir = tempdir CLEANUP => 1;
            $tgz->extract( $dir );

            unlink $filename;
            my $files = $self->{lookup}{$parameter}{manifest};
            for my $f (@$files) {
                if ($f =~ m!^lib/!) {
                    $self->install_to_lib("$dir/$f");
                    if ($f =~ /par$/) {
                        $f =~ s{.*/}{};
                        $self->installed($parameter, $f);
                    }
                }
                if ($f =~ m!^bin/!) {
                    my $t = $f;
                    $t =~ s/^bin/cmd/;
                    move "$dir/$f" => "$dir/$t";
                    $f = $t;
                }
                if ($f =~ m!^cmd/!) {
                    if ($f =~ m{/tree-tagger-[a-z]+$}) {
                        $self->install_to_bin("$dir/$f")
                    }
                    else {
                        $self->install_to_cmd("$dir/$f")
                    }
                }
            }
        }
    }
    else {
        my $utf = $self->{lookup}{$parameter}{encoding} =~ /utf.?8/i ? "-utf8" : "";

        gunzip $filename => "$lang$utf.par" or die "Can't gunzip $filename";

        $self->install_to_lib("$lang$utf.par");

        unlink $filename;
        unlink "$lang$utf.par";
        $self->installed($parameter, "$lang$utf.par");
    }

    return 0;
}


sub save {
    my $self = shift;
    open CFG, ">", "$ENV{HOME}/.treetagger" or die "Can't create file\n";
    print CFG Dump($self->{data});
    close CFG;
}

sub create_cfg {
    open CFG, ">", "$ENV{HOME}/.treetagger" or die "Can't create file\n";
    $data->{version} = $VERSION;
    print CFG Dump($data);
    close CFG;
}

1;

=encoding utf8

=head1 NAME

Lingua::TreeTagger::Installer - An installer tool for TreeTagger

=head1 DESCRIPTION

This module is an auxiliary module for C<tree-tagger-install-lang>
command line script.

=head1 AUTHOR

Alberto Manuel Brandão Simões, E<lt>ambs@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2010-2012 by Alberto Manuel Brandão Simões

=cut

__DATA__
platforms:
  linux: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/tree-tagger-linux-3.2.tar.gz
  linux64: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/tree-tagger-linux-3.2-64bit.tar.gz
#  sparc-solaris: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/tree-tagger-3.2.tar.gz
#  macosx-ppc: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/tree-tagger-MacOSX-3.2.tar.gz
  macosx-intel: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/tree-tagger-MacOSX-3.2-intel.tar.gz
languages:
  bulgarian:
    - id: BG-1
      encoding: UTF-8
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/bulgarian-par-linux-3.1.bin.gz
  dutch:
    - id: NL-1
      encoding: ISO-8859-1
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/dutch-par-linux-3.1.bin.gz
    - id: NL-2
      encoding: ISO-8859-1
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/dutch2-par-linux-3.1.bin.gz
      author: Julien Bioche
  english:
    - id: EN-1
      encoding: ISO-8859-1
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/english-par-linux-3.1.bin.gz
  french:
    - id: FR-1
      encoding: ISO-8859-1
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/french-par-linux-3.2.bin.gz
    - id: FR-2
      encoding: UTF-8
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/french-par-linux-3.2-utf8.bin.gz
  italian:
    - id: IT-1
      encoding: ISO-8859-1
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/italian-par-linux-3.1.bin.gz
    - id: IT-2
      encoding: UTF-8
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/italian-par-linux-3.2-utf8.bin.gz
    - id: IT-3
      encoding: ISO-8859-1
      author: Marco Beroni
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/italian-par2-linux-3.1.bin.gz
  spanish:
    - id: ES-1
      encoding: ISO-8859-1
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/spanish-par-linux-3.1.bin.gz
    - id: ES-2
      encoding: UTF-8
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/spanish-par-linux-3.2-utf8.bin.gz
  estonian:
    - id: ET-1
      encoding: UTF-8
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/estonian-par-linux-3.2.bin.gz
  swahili:
    - id: SW-1
      encoding: ISO-8859-1
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/swahili-par-linux-3.2.bin.gz
  latin:
    - id: LA-1
      encoding: ISO-8859-1
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/latin-par-linux-3.2.bin.gz
  german:
    - id: DE-1
      encoding: ISO-8859-1
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/german-par-linux-3.2.bin.gz
    - id: DE-2
      encoding: UTF-8
      url: ftp://ftp.ims.uni-stuttgart.de/pub/corpora/german-par-linux-3.2-utf8.bin.gz
  portuguese:
    - id: PT-1
      encoding: ISO-8859-1
      author: Pablo Gamallo
      url: http://gramatica.usc.es/~gamallo/tagger/tree-taggerPT-GZ.tar.gz
      manifest:
        - lib/pt.par
        - lib/portuguese-abbreviations
        - cmd/tree-tagger-portuguese
        - bin/tokenizer-gz.perl
  galician:
    - id: GL-1
      encoding: ISO-8859-1
      author: Pablo Gamallo
      url: http://gramatica.usc.es/~gamallo/tagger/tree-taggerPT-GZ.tar.gz
      manifest:
        - lib/gz.par
        - cmd/tree-tagger-galicien