The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
=head1 NAME

WWW::TV::Episode - Parse TV.com for TV Episode information.

=head1 SYNOPSIS

  use WWW::TV::Episode qw();
  my $episode = WWW::TV::Series->new(id => '475567');

  # with optional paramers

  print $episode->summary;

=head1 DESCRIPTION

The L<WWW::TV::Episode> module parses TV.com episode information using
L<LWP::UserAgent>. Unfortunately I can't see a way to search for an episode
by name, so I haven't implemented it. It is probably possible to do so if you
populate a series object and grep $series->episodes for the episode name you
are searching for.

=head1 METHODS

=cut

package WWW::TV::Episode;
use strict;
use warnings;

our $VERSION = '0.14';

use Carp qw(croak);
use LWP::UserAgent qw();

=head2 new

    The new() method is the constructor. It takes the id of the show
    assuming you have previously looked that up.

        # default usage
        my $episode = WWW::TV::Episode->new(id => 924072);

        # change user-agent from the default of "libwww-perl/#.##"
        my $episode = WWW::TV::Episode->new(id => 924072, agent => 'WWW::TV');

    It also (optionally) takes the name of the episode. This is not used
    in any way to search for the episode, but is used as initial data
    population for that field so that the html isn't parsed if you only
    want an object with the name. This is used by the L<WWW::TV::Series>
    object to populate a big array of episodes that have names without
    needing to fetch any pages.

        # pre-populate episode name
        my $episode = WWW::TV::Episode->new(id => 924072, name => 'Run!');

=cut

sub new {
    my $class = ref $_[0] ? ref(shift) : shift;

    my %data;

    if (@_ == 1) {
        $data{id} = shift;
    }
    elsif (scalar(@_) % 2 == 0) {
        %data = @_;
    }

    croak 'No id given to constructor' unless exists $data{id};
    croak "Invalid id: $data{id}" unless ($data{id} =~ /^\d+$/ && $data{id});

    return bless {
        id     => $data{id},
        name   => $data{name},
        _agent => $data{agent},
        _site  => $data{site},
        filled => {
            id => 1,
            $data{name}
                ? (name => 1)
                : (),
        },
    }, $class;
}

=head2 id

    The ID of this episode, according to TV.com

=cut

sub id {
    my $self = shift;

    return $self->{id};
}

=head2 name

    Returns a string containing the name of the episode.

=cut

sub name {
    my $self = shift;

    unless (exists $self->{filled}->{name}) {
        $self->{filled}->{name} = 1;
        ($self->{name}) = $self->_html =~ m{
            <h2\sclass="module_title">(.*)</h2>\n
            \s*<ul\sclass="ep_stats">
        }x;
    }

    return $self->{name};
}

=head2 summary

    Returns a string containing basic information about this series.

=cut

sub summary {
    my $self = shift;

    unless (exists $self->{filled}->{summary}) {
        $self->{filled}->{summary} = 1;
        ($self->{summary}) = $self->_html =~ m{
            <p\sclass="deck">(.*?)</p>
        }smx;
        $self->{summary} =~ s/<br ?\/?>/\n/g;
        $self->{summary} =~ s/<a href="[^"]+">.*?<\/a>//g;
        $self->{summary} =~ s/^\s*//;
        $self->{summary} =~ s/\s*$//;
    }

    return $self->{summary};
}

=head2 season_number

    Returns the season number that this episode appeared in.

=cut

sub season_number {
    my $self = shift;

    unless (exists $self->{filled}->{season_number}) {
        $self->_fill_vitals;
    }

    return $self->{season_number};
}

=head2 episode_number

    Returns the overall number of this episode. Note, this is not
    necessarily the production order of the episodes, but is the order
    in which they aired.

=cut

sub episode_number {
    my $self = shift;

    unless (exists $self->{filled}->{episode_number}) {
        $self->_fill_vitals;
    }

    return $self->{episode_number};
}

=head2 format_details ($format_str)

    Returns episode details using a special format string, similar to printf:
       %I - series ID
       %N - series name
       %s - season number
       %S - season number (0-padded to two digits, if required)
       %i - episode ID
       %e - episode number
       %E - episode number (0-padded to two digits, if required)
       %n - episode name
       %d - date episode first aired

    The default format is:
       %N.s%Se%E - %n (eg: "Heroes.s1e02 - Don't Look Back")

=cut

sub format_details {
    my $self = shift;

    my $format_str = shift || '%N.s%Se%E - %n';

    # format subs .. expecting $_[0] is $self
    my %formats = (
       'I'  => sub { $_[0]->series_id },
       'N'  => sub { $_[0]->series->name },
       's'  => sub { $_[0]->season_number },
       'S'  => sub { sprintf('%02d', $_[0]->season_number) },
       'i'  => sub { $_[0]->id },
       'e'  => sub { $_[0]->episode_number },
       'E'  => sub { sprintf('%02d', $_[0]->episode_number) },
       'n'  => sub { $_[0]->name },
       'd'  => sub { $_[0]->first_aired },
    );

    # substitution
    $format_str =~
            s/
                # look for single character format specifier
                %([a-zA-Z])
            /
                # use format sub if found, otherwise leave as-is
                $formats{$1} ? $formats{$1}->($self) : "\%$1"

            /sgex;

    return $format_str;
}

=head2 first_aired

    Returns a string of the date this episode first aired in ISO 8601 (yyyy-mm-dd) format.

=cut

sub first_aired {
    my $self = shift;

    unless (exists $self->{filled}->{first_aired}) {
        $self->_fill_vitals;
    }

    return $self->{first_aired};
}

=head2 stars

    Returns a list of the stars that appeared in this episode.

    # in scalar context, returns a comma-delimited string
    my $stars = $episode->stars;

    # in array context, returns an array
    my @stars = $episode->stars;

=cut

sub stars {
    my $self = shift;

    unless (exists $self->{filled}->{stars}) {
        my ($stars) = $self->_html =~ m{
          <dl\s*>\s*
          <dt>Stars?:</dt>\s*
          (<dd>.*?</dd>)\s*
          </dl>
        }x;

        $self->{stars} = $self->_parse_people($stars);
        $self->{filled}->{stars} = 1;
    }

    return $self->{stars};
}

=head2 guest_stars

    Returns a list of the guest stars that appeared in this episode.

    # in scalar context, returns a comma-delimited string
    my $guest_stars = $episode->guest_stars;

    # in array context, returns an array
    my @guest_stars = $episode->guest_stars;

=cut

sub guest_stars {
    my $self = shift;

    unless (exists $self->{filled}->{guest_stars}) {
        my ($stars) = $self->_html =~ m{
          <dl\s*(?:class="last")?\s*>\s*
          <dt>Guest\sStars?:</dt>\s*
          (<dd>.*?</dd>)\s*
          </dl>
        }x;

        $self->{guest_stars} = $self->_parse_people($stars);
        $self->{filled}->{guest_stars} = 1;
    }

    return $self->{guest_stars};
}

=head2 recurring_roles

    Returns a list of the people who have recurring roles
    that appeared in this episode

    # in scalar context, returns a comma-delimited string
    my $recurring_roless = $episode->recurring_roless;

    # in array context, returns an array
    my @recurring_roless = $episode->recurring_roless;

=cut

sub recurring_roles {
    my $self = shift;

    unless (exists $self->{filled}->{recurring_roles}) {
        my ($stars) = $self->_html =~ m{
          <dl\s*>\s*
          <dt>Recurring\sRoles?:</dt>\s*
          (<dd>.*?</dd>)\s*
          </dl>
        }x;

        $self->{recurring_roles} = $self->_parse_people($stars);
        $self->{filled}->{recurring_roles} = 1;
    }

    return $self->{recurring_roles};
}

sub _parse_people {
    my $self = shift;
    my $stars = shift or return;

    my @stars;
    for my $star (split /<\/dd>/, $stars) {
        next unless $star =~ m{<a href="[^"]+">(.*?)</a>};
        push @stars, $1;
    }

    return join(', ', @stars);
}

=head2 writers

    Returns a list of the people that wrote this episode.

    # in scalar context, returns a comma-delimited string
    my $writers = $episode->writers;

    # in array context, returns an array
    my @writers = $episode->writers;

=cut

sub writers {
    my $self = shift;

    unless (exists $self->{filled}->{writers}) {
        my ($writers) = $self->_html =~ m{
          <dl\s*>\s*
          <dt>Writers?:</dt>\s*
          (<dd>.*?</dd>)\s*
          </dl>
        }x;

        $self->{writers} = $self->_parse_people($writers);
        $self->{filled}->{writers} = 1;
    }

    return $self->{writers};
}

=head2 directors

    Returns a list of the people that directed this episode.

    # in scalar context, returns a comma-delimited string
    my $directors = $episode->directors;

    # in array context, returns an array
    my @directors = $episode->directors;

=cut

sub directors {
    my $self = shift;

    unless (exists $self->{filled}->{directors}) {
        my ($directors) = $self->_html =~ m{
          <dl\s*>\s*
          <dt>Directors?:</dt>\s*
          (<dd>.*?</dd>)\s*
          </dl>
        }x;

        $self->{directors} = $self->_parse_people($directors);
        $self->{filled}->{directors} = 1;
    }

    return $self->{directors};
}

=head2 agent ($value)

Returns the current user agent setting, and sets to $value if provided.

=cut

sub agent {
    my $self = shift;   # may be called as $self or $class
    my $value = shift;

    if (ref $self) {
        if (defined $value) {    
            $self->{_agent} = $value;
        }
        return ($self->{_agent} || LWP::UserAgent::_agent);
    } else {
        return ($value || LWP::UserAgent::_agent);
    }
}

=head2 site ($value)

Returns the current mirror site setting, and sets to $value if provided.

Default site is "www"; other options include: us, uk, au

=cut

sub site {
    my $self = shift;  # may be called as $self or $class
    my $value = shift;

    if (ref $self) {
        if (defined $value) {
          if ($value =~ /^(au|uk|us|www|)$/i) {
            $self->{_site} = $value;
          } else {
            warn "Ignoring unknown site value: [$value]\n";
          }
        }
        return ($self->{_site} || 'www');
    } else {
        return ($value || 'www');
    }
}

=head2 url

    Returns the url that was used to create this object.

=cut

sub url {
    my $self = shift;

    return sprintf('http://%s.tv.com/episode/%d/summary.html', $self->site, $self->id);
}

=head2 season

    Returns an array of other episodes for the same season of this series.

=cut

sub season {
    my $self = shift;
    my @episodes = $self->series->episodes( season => $self->season_number );
    return wantarray ? @episodes : \@episodes;
}

=head2 series_id

    Returns the series ID for this episode.

=cut

sub series_id {
    my $self = shift;

    unless (exists $self->{filled}->{series_id}) {
        my ($id) = $self->_html =~ m{<a href=".*/show/(\d+)/cast\.html">};
        $self->{series_id} = $id;
        $self->{filled}->{series_id} = 1;
   }

    return $self->{series_id};
}

=head2 series

    Returns an L<WWW::TV::Series> object which is the complete series
    that this episode is a part of.

=cut

sub series {
    my $self = shift;

    unless (exists $self->{filled}->{series}) {
        if ($self->series_id) {
            require WWW::TV::Series;
            $self->{series} = WWW::TV::Series->new(id => $self->series_id);
            $self->{filled}->{series} = 1;
        } else {
            croak "Can't find series_id for this episode";
        }
    }

    return $self->{series};
}

sub _fill_vitals {
    my $self = shift;

    ($self->{season_number}, $self->{episode_number}, $self->{first_aired})
        = $self->_html
        =~ m{
            <ul\sclass="ep_stats">
              <li>.*?</li>
              <li><span>Season:</span>\s*(.*?)\s*</li>
              <li><span>Episode:</span>\s*(.*?)\s*</li>
              (?:<li><span>First\sAired:</span>\s*(?:\w*?\s*)?(\d+/\d+/\d+|n/a)\s*</li>)?
              (?:<li><span>Prod\sCode:</span>\s*.*\s*</li>)?
            </ul>
        }sx;

    $self->{filled}->{$_} = 1 for qw(episode_number season_number first_aired);

    return $self->_parse_first_aired;
}

sub _parse_first_aired {
    my $self = shift;

    if (not defined $self->{first_aired}) {
      $self->{first_aired} = 'n/a';
    }

    return if $self->{first_aired} eq 'n/a';

    my ($month, $day, $year) = split('/', $self->{first_aired});
    $self->{first_aired} = sprintf('%04d-%02d-%02d', $year, $month, $day);

    return 1;
}

sub _html {
    my $self = shift;

    unless ($self->{filled}->{html}) {
        my $ua = LWP::UserAgent->new( agent => $self->agent );
        my $rc = $ua->get($self->url);

        croak sprintf('Unable to fetch page for series %s', $self->id)
            unless $rc->is_success;
        $self->{html} =
            join(
                "\n",
                map { s/^\s*//; s/\s*$//; $_ }
                split /\n/, $rc->content
            );
        $self->{filled}->{html} = 1;
    }

    return $self->{html};
}

1;

__END__

=head1 SEE ALSO

L<WWW::TV::Series>

=head1 KNOWN ISSUES

There isn't yet any caching support. I don't see a need for it, but if you feel
the need to implement it then don't let me stop you.

There also isn't support for proxy servers yet. LWP should use it from your
environment if you really need it, but who still uses them anyway? Isn't it all
done transparently these days.

=head1 BUGS

Please report any bugs or feature requests through the web interface
at L<http://rt.cpan.org/Dist/Display.html?Queue=WWW-TV>.

=head1 AUTHORS

Danial Pearce C<cpan@tigris.id.au>

Stephen Steneker C<stennie@cpan.org>

=head1 LICENCE AND COPYRIGHT

Copyright (c) 2006-2008 Danial Pearce C<cpan@tigris.id.au>. All rights reserved.

Some parts copyright 2007-2008 Stephen Steneker C<stennie@cpan.org>.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.