The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Scrapar::Extractor::TableExtract;

use strict;
use warnings;
use HTML::TableExtract;
use base qw(Scrapar::Extractor::_base);

sub extract {
    my $self = shift;
    my $content = shift;
    my $params_ref = shift;

    my $te = HTML::TableExtract->new($self->{keep_html} ? (keep_html => 1) : ());
    $te->parse($content);

    my $tables;
    my $output;

    foreach my $ts ($te->tables) {
        my ($x, $y) = $ts->coords;
	if ($x == $params_ref->{x} && $y == $params_ref->{y}) {
#	    print "($x, $y)\n";
	    my @rows = $ts->rows;

	    for my $i (0..$#rows) {
		my @cols = @{$rows[$i]};
		for my $j (0..$#cols) {
		    if ($cols[$j]) {
			$cols[$j] =~ s[\A\s+][]o;
			$cols[$j] =~ s[\s+\z][]o;
			$cols[$j] =~ s[\r][]sgo;
			$cols[$j] =~ s[^\s*\r?\n$][\n]msgo;
			$cols[$j] =~ s[\n+][\n]sgo;
 		        $output .= $cols[$j];
			$tables->[$x][$y][$i][$j] = $cols[$j];
		    }
		}
		$output .= "\n\n";
	    }
	    return $output;
	}
    }
}

1;

__END__

=pod

=head1 NAME

Scrapar::Extractor::TableExtract - Table extractor

=head1 COPYRIGHT

Copyright 2009-2010 by Yung-chung Lin 

All right reserved. This program is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.

=cut