The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

use strict;
use warnings;
use utf8;

use Getopt::Long;
use Pod::Usage;

use File::Basename;
use Lingua::BioYaTeA::PreProcessing;

my $man = 0;
my $help = 0;

my %options;
my $fh;

&GetOptions(\%options, 
	    "help|?|h", 
	    "man|m", 
	    "input-file|i:s", 
	    "output-file|o:s") 
    or pod2usage(2);

pod2usage(-exitstatus => 0, -verbose => 2) if $options{'man'};
pod2usage(1) if ($options{'help'}); #  or (!$options{'input-file'} or !$options{'output-file'})

my $in_list = $options{'input-file'};
my $out_list = $options{'output-file'};

my $preProc = Lingua::BioYaTeA::PreProcessing->new();

if (defined $out_list) {
    open($fh, ">$out_list") or die "can not open file $out_list to record corrected file";
} else {
    $fh = *stdout;
}
$preProc->process_file($in_list, $fh);
if (defined $out_list) {
    close($fh);
}
=encoding utf8

=head1 NAME

preProcessingRewriting - Perl script for rewriting the POS-tagged terms provided by TreeTagger.

=head1 SYNOPSIS

preProcessingRewriting [-help] [-man] [--configuration file] input_file output_file

=head1 OPTIONS

=over 4

=item    B<--help>, B<-h>, B<-?>              brief help message

=item    B<--man>, B<-m>                   full documentation

=item    I<input_file>, I<-i>              BioYaTeA input file in TreeTagger ouput format

=item    I<output_file>, I<-o>             Rewriting output file (TreeTagger format)


=back

=head1 DESCRIPTION

This script performs the pre-processing of the TreeTagger output in
order to improve the extraction of both terms containing prepositional
phrases (with TO and AT prepositions) and terms containing participles
(past participles -ED and gerunds -ING).
Context-based rules are applied to the POS tags either to trigger the
extraction of relevant structures or to prevent the extraction of
irrelevant ones. The modified file becomes a new input file for
BioYaTeA.

Without specifying the input file, the input data are read on stdin. 
Without specifying the output file, the ouput data are print on stdout. 

=head1 INPUT/OUTPUT FILE FORMATS

See Documentation in Lingua::YaTeA


=head1 EXAMPLES

preProcessingRewriting -i examples/sampleEN.ttg -o examples/sampleEN-prepro

preProcessingRewriting < examples/sampleEN.ttg > examples/sampleEN-prepro


=head1 SEE ALSO

Documentation of Lingua::BioYaTeA::PostProcessing, Lingua::BioYaTeA and Lingua::YaTeA 

=head1 AUTHORS

Wiktoria Golik <wiktoria.golik@jouy.inra.fr>, Zorana Ratkovic <Zorana.Ratkovic@jouy.inra.fr>, Robert Bossy <Robert.Bossy@jouy.inra.fr>, Claire Nédellec <claire.nedellec@jouy.inra.fr>, Thierry Hamon <thierry.hamon@univ-paris13.fr>

=head1 LICENSE

Copyright (C) 2012 Wiktoria Golik, Zorana Ratkovic, Robert Bossy, Claire Nédellec and Thierry Hamon

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.6 or,
at your option, any later version of Perl 5 you may have available.


=cut