The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Scrapar::Backend::_base;

use strict;
use warnings;
use Data::Dumper;
use Digest::MD5 qw(md5_hex);
use base 'Scrapar::Var';
use Scrapar::Mechanize;

sub _make_id {
    my $self = shift;

    return md5_hex $_[0];
}

sub new {
    my $class = shift;
    my $params_ref = shift;

    my $mech = 'Scrapar::Mechanize'->new();
    $mech->cache_expires_in($params_ref->{cache_expires_in});

    return bless {
	start_time => time(),
	start_url => $params_ref->{start_url},
	m => $mech,
	data => undef,
	logger => $ENV{SCRAPER_LOGGER},
    } => ref($class) || $class;
}

sub logger { $_[0]->{logger} }

# Apply extractors and data handlers
sub apply {
    my $self = shift;
    my $data = shift;
    my @filters = @_;

    if (exists $ENV{SCRAPER_MAX_TIME}
	&& time() - $self->{start_time} > $ENV{SCRAPER_MAX_TIME}) {
	print "Time limit is reached. Exiting ...\n";
	exit;
    }

    #
    # The filter args follow two types:
    # 
    # 1. 
    #
    # {
    #   name => 'blah', # module name
    #   new => { },     # the args to new(), 
    #   args => { }     # the args to extract() or to handle() ]
    # }
    # 
    # 2.
    #
    # sub { my $self = shift; my $data = shift; }
    # 

    for my $filter (@filters) {
	eval {
	    if (ref $filter eq 'HASH') {
		my $f;
		my $filter_module;
		if ($filter->{name} =~ m[^E::(.+)]) {
		    $filter_module = 'Scrapar::Extractor::' . ($1 || 'RSS');
		    $filter_module->require or die $@;
		    my $f = $filter_module->new($filter->{new});
		    $data = $f->extract($data, $filter->{args});
		}
		elsif ($filter->{name} =~ m[^D::(.+)]) {
		    $filter_module = 'Scrapar::DataHandler::' . ($1 || 'STDOUT');
		    $filter_module->require or die $@;
		    my $f = $filter_module->new($filter->{new});
		    $data = $f->handle($data, $filter->{args});
		}
	    }
	    elsif (ref $filter eq 'CODE') {
		$data = $filter->($self, $data);
	    }
	}
    }

    return $data;
}


sub data {
    my $self = shift;
    $self->{data} = $_[0] if $_[0];

    return $self->{data};
}

sub extract {
    my $self = shift;
    my $params_ref = shift;

    $self->{data} = $self->apply($self->{data}, $params_ref);
    return $self;
}

sub handle {
    my $self = shift;
    my $params_ref = shift;

    $params_ref->{name} = $ENV{DEFAULT_DATAHANDLER} if $ENV{DEFAULT_DATAHANDLER};
 
    $self->apply($self->{data}, $params_ref);
    return $self;
}

sub handle_one_item {
    my $self = shift;
    my $r = shift;
    my $args = shift;

    $self->{data} = [ $r ];
    $self->handle($args);
}

sub dumper {
    my $self = shift;
    my $data = shift;

    print Dumper $data;
}

sub run {
    die "This method must be overridden";
}

1;

__END__

=pod

=head1 NAME

Scrapar::Backend::_base - The base class for building backends

=head1 COPYRIGHT

Copyright 2009-2010 by Yung-chung Lin

All right reserved. This program is free software; you can
redistribute it and/or modify it under the same terms as Perl itself.

=cut