The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -ws
use strict; use warnings;
use Text::Perfide::BookSync;
use utf8::all;

our($split,$mark,$noclean,$html,$rm,$num,$pfile,$dump,$localrc,$dir,$outfile,$rep);
$rm //= 0;
my  @pairs;

# my $BOOKSYNC_ENV;
# if defined($localrc)	{$BOOKSYNC_ENV = load_localrc('.'); }
# else					{$BOOKSYNC_ENV = { EDITOR => 'vim', BROWSER => 'firefox' }}

my $options = {};
$options->{dir}  	= $dir;
$options->{dump} 	= 1 if $dump;
$options->{num}  	= 1 if $num;
$options->{outfile} = $outfile if $outfile;
$options->{noclean} = 1 if $noclean;
$options->{outlist} = [];
my $fileL = shift or die "Please provide a file as argument";
my $fileR = shift;

if($fileL =~ /bpairs$/ and not defined($fileR)){
	print STDERR "Argument '$fileL' contains list of pairs of files to align.\n";
	my $bpairs = $fileL;
	open(PAIRS,'<',$bpairs) or die "Could not open file '$bpairs'";
	@pairs = <PAIRS>;
}
elsif(defined($fileR)){
	print STDERR "Aligning sections from '$fileL' and '$fileR'.\n";
	push @pairs, "$fileL\t$fileR\n";
}

foreach (@pairs){
	chomp;
	next if (/^$/ or /^#/);
	my ($fileL,$fileR) = split "\t";
	print STDERR "Synchronizing files:\n\tleft:  '$fileL' and\n\tright: '$fileR'\n";
	my $tabsec = { 	left  => { 	file => $fileL,
								secs => populate($fileL) },
					right => {	file => $fileR,
								secs => populate($fileR) }
		};
	
	moreinfosecs($tabsec,$options);
	my $chunks = calchunks($tabsec,$fileL,$fileR,$options);
	moreinfochunks($chunks,$tabsec,$options);
	print_report($chunks,$tabsec) if ($rep);
	
	splitchunks($chunks,$fileL,$fileR,$options) if $split;
	marksync($chunks,$tabsec,$fileL,$fileR,$options);
	htmlmatrix($chunks,$tabsec,$options) if $html;
}

print_outfile($options) if defined($outfile);

sub print_report {
	my ($chunks,$tabsec,$options) = @_;
	foreach my $c (@$chunks) {
		print '--------------------',"\n";
		print "sections:\n";
		print "\tleft:  ";
		print join ', ', map { $tabsec->{left}{secs}[$_]{id} } @{$c->{left}{secs}};
		print "\n";
		print "\tright: ";
		print join ', ', map { $tabsec->{right}{secs}[$_]{id} } @{$c->{right}{secs}};
		print "\n";

		my $wc_left  = $c->{left}{wc};
		my $wc_right = $c->{right}{wc};

		my $warn = '';
		if 		($wc_left < 100 and $wc_right < 100) 	{ $warn = 'WARNING: Small chunk (nr words < 100)'; 	}
		elsif 	(_ratio_mM($wc_left,$wc_right) > 1.0 )	{ $warn = 'ERROR: Word count ratio > 100%';			}
		elsif 	(_ratio_mM($wc_left,$wc_right) > 0.25)	{ $warn = 'WARNING: Word count ratio > 25%';		}

		print "words:\t$warn\n\tleft:  $wc_left\n";
		print "\tright:  $c->{right}{wc}\n";

		my $titleL = $tabsec->{left}{secs}[$c->{left}{secs}[0]]{title};
		$titleL =~ s/\n/ /g;
		my $titleR = $tabsec->{right}{secs}[$c->{right}{secs}[0]]{title};
		$titleR =~ s/\n/ /g;
		print "beginning:\n\tleft:  $titleL\n";
		print "\tright: $titleR\n";
	}
}

sub _ratio_mM {
	my ($l,$h) = @_;
	return _ratio_mM($h,$l) if ($l > $h);
	return ($h-$l)/$l;
}

sub print_outfile {
	my $options = shift;
	my $outfile = $options->{outfile};
	open my $file, '>', $outfile or die "Could not open '$outfile'\n";
	foreach(@{$options->{outlist}}){
		print $file "$_\n";
	}
	close $file;
}

=head1 NAME

syncbooks - synchronizes books based on section marks produced with Text::Perfide::BookCleaner

=head1 SYNOPSIS

 syncbooks [options] file.bpairs

 syncbooks [options] file1 file2

=head1 DESCRIPTION

Synchronizes two books (file1 and file2) or several pairs of books,
    passed in a file with extension "bpairs", each pair in one line with names separated by tab.

=head1 Options

 -split  splits file1 and file2 in numbered files (chunks) where each 
     file1.lXXX matches file2.rXXX

 -mark inserts synchronization marks <sync id="..."> and generates 
     file1.sync and file2.sync. This is the default.

 -rm=n do not output the first n chunks to the sync files 
     (use with -mark)

 -noclean  do not remove any sections marks left after synchronizing (default is to remove)

 -html   treate a C<teste.html> file with alignment matrix.

 -num   ignore section type, use only section numbering to align

 -dump generate file with Dumper from secs and chunks (debug only)

=head1 AUTHOR

Andre Santos, andrefs@cpan.org

=head1 SEE ALSO

perl(1).

Text::Perfide::BookCleaner(3)

=cut