#!/usr/bin/perl -ws
use strict; use warnings;
use Text::Perfide::BookSync;
use utf8::all;
our($split,$mark,$noclean,$html,$rm,$num,$pfile,$dump,$localrc,$dir,$outfile,$rep);
$rm //= 0;
my @pairs;
# my $BOOKSYNC_ENV;
# if defined($localrc) {$BOOKSYNC_ENV = load_localrc('.'); }
# else {$BOOKSYNC_ENV = { EDITOR => 'vim', BROWSER => 'firefox' }}
my $options = {};
$options->{dir} = $dir;
$options->{dump} = 1 if $dump;
$options->{num} = 1 if $num;
$options->{outfile} = $outfile if $outfile;
$options->{noclean} = 1 if $noclean;
$options->{outlist} = [];
my $fileL = shift or die "Please provide a file as argument";
my $fileR = shift;
if($fileL =~ /bpairs$/ and not defined($fileR)){
print STDERR "Argument '$fileL' contains list of pairs of files to align.\n";
my $bpairs = $fileL;
open(PAIRS,'<',$bpairs) or die "Could not open file '$bpairs'";
@pairs = <PAIRS>;
}
elsif(defined($fileR)){
print STDERR "Aligning sections from '$fileL' and '$fileR'.\n";
push @pairs, "$fileL\t$fileR\n";
}
foreach (@pairs){
chomp;
next if (/^$/ or /^#/);
my ($fileL,$fileR) = split "\t";
print STDERR "Synchronizing files:\n\tleft: '$fileL' and\n\tright: '$fileR'\n";
my $tabsec = { left => { file => $fileL,
secs => populate($fileL) },
right => { file => $fileR,
secs => populate($fileR) }
};
moreinfosecs($tabsec,$options);
my $chunks = calchunks($tabsec,$fileL,$fileR,$options);
moreinfochunks($chunks,$tabsec,$options);
print_report($chunks,$tabsec) if ($rep);
splitchunks($chunks,$fileL,$fileR,$options) if $split;
marksync($chunks,$tabsec,$fileL,$fileR,$options);
htmlmatrix($chunks,$tabsec,$options) if $html;
}
print_outfile($options) if defined($outfile);
sub print_report {
my ($chunks,$tabsec,$options) = @_;
foreach my $c (@$chunks) {
print '--------------------',"\n";
print "sections:\n";
print "\tleft: ";
print join ', ', map { $tabsec->{left}{secs}[$_]{id} } @{$c->{left}{secs}};
print "\n";
print "\tright: ";
print join ', ', map { $tabsec->{right}{secs}[$_]{id} } @{$c->{right}{secs}};
print "\n";
my $wc_left = $c->{left}{wc};
my $wc_right = $c->{right}{wc};
my $warn = '';
if ($wc_left < 100 and $wc_right < 100) { $warn = 'WARNING: Small chunk (nr words < 100)'; }
elsif (_ratio_mM($wc_left,$wc_right) > 1.0 ) { $warn = 'ERROR: Word count ratio > 100%'; }
elsif (_ratio_mM($wc_left,$wc_right) > 0.25) { $warn = 'WARNING: Word count ratio > 25%'; }
print "words:\t$warn\n\tleft: $wc_left\n";
print "\tright: $c->{right}{wc}\n";
my $titleL = $tabsec->{left}{secs}[$c->{left}{secs}[0]]{title};
$titleL =~ s/\n/ /g;
my $titleR = $tabsec->{right}{secs}[$c->{right}{secs}[0]]{title};
$titleR =~ s/\n/ /g;
print "beginning:\n\tleft: $titleL\n";
print "\tright: $titleR\n";
}
}
sub _ratio_mM {
my ($l,$h) = @_;
return _ratio_mM($h,$l) if ($l > $h);
return ($h-$l)/$l;
}
sub print_outfile {
my $options = shift;
my $outfile = $options->{outfile};
open my $file, '>', $outfile or die "Could not open '$outfile'\n";
foreach(@{$options->{outlist}}){
print $file "$_\n";
}
close $file;
}
=head1 NAME
syncbooks - synchronizes books based on section marks produced with Text::Perfide::BookCleaner
=head1 SYNOPSIS
syncbooks [options] file.bpairs
syncbooks [options] file1 file2
=head1 DESCRIPTION
Synchronizes two books (file1 and file2) or several pairs of books,
passed in a file with extension "bpairs", each pair in one line with names separated by tab.
=head1 Options
-split splits file1 and file2 in numbered files (chunks) where each
file1.lXXX matches file2.rXXX
-mark inserts synchronization marks <sync id="..."> and generates
file1.sync and file2.sync. This is the default.
-rm=n do not output the first n chunks to the sync files
(use with -mark)
-noclean do not remove any sections marks left after synchronizing (default is to remove)
-html treate a C<teste.html> file with alignment matrix.
-num ignore section type, use only section numbering to align
-dump generate file with Dumper from secs and chunks (debug only)
=head1 AUTHOR
Andre Santos, andrefs@cpan.org
=head1 SEE ALSO
perl(1).
Text::Perfide::BookCleaner(3)
=cut