The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package TV::Anytime;
use strict;
use warnings;
use DateTime;
use DateTime::Format::ISO8601;
use DateTime::Format::Duration;
use File::Find::Rule;
use List::Util;
use Path::Class;
use TV::Anytime::Event;
use TV::Anytime::Genre;
use TV::Anytime::Group;
use TV::Anytime::Program;
use TV::Anytime::Service;
use XML::LibXML;
use XML::LibXML::XPathContext;
use base 'Class::Accessor::Chained::Fast';
__PACKAGE__->mk_accessors(qw(directory));
our $VERSION = '0.31';

sub new {
  my $class     = shift;
  my $directory = shift;
  die "$directory not a directory" unless -d $directory;

  die "$directory does not contain ServiceInformation.xml"
    unless -f file($directory, "ServiceInformation.xml");

  my $self = {};
  bless $self, $class;
  $self->directory($directory);
  return $self;
}

sub _find_files {
  my ($self, $id, $type) = @_;
  my @files =
    File::Find::Rule->file->name("*${id}_${type}.xml")->in($self->directory);
  return sort @files;
}

sub _programs {
  my ($self, $id) = @_;
  my @programs = $self->_program_information($id);
  my @events   = $self->_program_location($id);

  my %programs;
  $programs{ $_->id } = $_ foreach @programs;

  my %events;
  foreach my $event (@events) {
    $event->program($programs{ $event->crid });
    push @{ $events{ $event->crid } }, $event;
  }

  foreach my $program (@programs) {
    $program->events_ref($events{ $program->id });
  }

  return \@programs, \@events;
}

sub _program_information {
  my ($self, $id) = @_;
  my @programs;
  foreach my $file ($self->_find_files($id, "pi")) {
    push @programs, $self->_program_information_single($id, $file);
  }
  return @programs;
}

my %flags = (
  'AD' => 'is_audio_described',
  'S'  => 'is_subtitled',
  'SL' => 'is_deaf_signed',
);

sub _program_information_single {
  my ($self, $id, $filename) = @_;
  my $xpc = $self->_parse_file($filename);
  my @programs;
  foreach my $node ($xpc->findnodes("//tva:ProgramInformation")) {
    my $program = TV::Anytime::Program->new;
    $program->id($node->getAttribute('programId'));
    $program->title($xpc->findvalue(".//tva:Title",       $node));
    $program->synopsis($xpc->findvalue(".//tva:Synopsis[attribute::length='short']", $node));
    $program->synopsis_long($xpc->findvalue(".//tva:Synopsis[attribute::length='long']", $node));
    
    # clean up synopsis
    foreach my $s (qw(synopsis synopsis_long)) {
      my $synopsis = $program->$s;
      $synopsis =~s /^(CBeebies:?|CBBC|\[Ages? \d+-\d+\])\.? //;
      # fix title when title is Julian Fellowes Investigates...
      # and synopsis is ...a Most Mysterious Murder. The Case of etc.
      if ($synopsis =~ s/^\.\.\. ?//) {
        my $title = $program->title;
        $title =~ s/\.\.\.//;
        $synopsis =~ s/^(.+?)\. //;
        if ($1) {
        $title .= ' ' . $1;
        $title =~ s/ {2,}/ /;
        $program->title($title);
      }
        
      }
      $program->$s($synopsis);
    }
    
    # extract audio described / subtitled / deaf_signed from synopsis
    foreach my $s (qw(synopsis synopsis_long)) {
      my $synopsis = $program->$s;   
      next unless $synopsis =~ s/\[([A-Z,]+)\]//;
      my $flags = $1;
      foreach my $flag (split ",", $flags) {
        my $method = $flags{$flag} || next; # bad data
        $program->$method(1);
      }
      $program->$s($synopsis);
    }
    
    $program->caption_language(
      $xpc->findvalue(".//tva:CaptionLanguage", $node));
    $program->audio_channels($xpc->findvalue(".//tva:NumOfChannels", $node));
    $program->aspect_ratio($xpc->findvalue(".//tva:AspectRatio",     $node));

    my @member_of;
    foreach my $subnode ($self->_xpc($node)->findnodes(".//tva:MemberOf")) {
      push @member_of, $subnode->getAttribute('crid');
    }
    $program->member_of(\@member_of);

    my @genres;
    foreach my $subnode ($self->_xpc($node)->findnodes(".//tva:Genre")) {
      my $href = $subnode->getAttribute('href');
      $href =~ s/^urn:tva:metadata:cs:(.+?):.+$/$1/;
      push @genres,
        TV::Anytime::Genre->new(
        {
          name  => $href,
          value => $self->_xpc($subnode)->findvalue("./tva:Name"),
        }
        );
    }
    $program->genres_ref(\@genres);

    push @programs, $program;
  }
  return @programs;
}

sub _program_location {
  my ($self, $id) = @_;

  my @events;
  foreach my $file ($self->_find_files($id, "pl")) {
    push @events, $self->_program_location_single($id, $file);
  }
  return @events;
}

sub _program_location_single {
  my ($self, $id, $filename) = @_;
  my $xpc = $self->_parse_file($filename);
  my @events;
  foreach my $node ($xpc->findnodes("//tva:ScheduleEvent")) {
    my $nodexpc = $self->_xpc($node);
    my $event   = TV::Anytime::Event->new;
    $event->crid($nodexpc->findnodes("./tva:Program", $node)->get_node(0)
        ->getAttribute('crid'));
    $event->start(
      $self->_parse_date($nodexpc->findvalue('./tva:PublishedStartTime')));
    my $duration =
      $self->_parse_duration($nodexpc->findvalue('./tva:PublishedDuration'));
    $event->stop($event->start + $duration);

#    warn $event->crid . ": " . $event->start->datetime . " -> " . $event->stop->datetime . "\n" if $event->start->datetime =~ /2005-08-.?.?T07:00/;
# eq 'crid://bbc.co.uk/277092412'
#or $event->crid eq 'crid://bbc.co.uk/277092882';
    push @events, $event;
  }
  return @events;
}

sub groups {
  my $self = shift;
  my @services;
  my $xpc = $self->_parse_file("groups_cr.xml");
  my ($members, $parents);
  foreach my $node ($xpc->findnodes("//cr:Result")) {
    my $id      = $node->getAttribute("CRID");
    my @members =
      map { $_->textContent } $self->_xpc($node)->findnodes(".//cr:Crid");
    $members->{$id} = \@members;
    push @{ $parents->{$_} }, $id foreach @members;
  }
  $xpc = $self->_parse_file("groups_gr.xml");
  my @groups;
  foreach my $node ($xpc->findnodes("//tva:GroupInformation")) {
    my $id      = $node->getAttribute("groupId");
    my $members = $members->{$id};
    next unless $members;
    push @groups,
      TV::Anytime::Group->new(
      {
        id   => $id,
        type => $self->_xpc($node)->findnodes("./tva:GroupType")->[0]
          ->getAttribute("value"),
        title       => $self->_xpc($node)->findvalue(".//tva:Title"),
        members_ref => $members,
        parents_ref => $parents->{$id},
      }
      );
  }
  return @groups;
}

sub services {
  my $self = shift;
  my @services;
  my $xpc = $self->_parse_file("ServiceInformation.xml");
  foreach my $node ($xpc->findnodes("//tva:ServiceInformation")) {

    my @genres;
    foreach my $subnode ($self->_xpc($node)->findnodes("./tva:ServiceGenre")) {
      my $href = $subnode->getAttribute('href');
      $href =~ s/^urn:tva:metadata:cs:(.+?):.+$/$1/;
      push @genres,
        TV::Anytime::Genre->new(
        {
          name  => $href,
          value => $self->_xpc($subnode)->findvalue("./tva:Name"),
        }
        );
    }
    push @services,
      TV::Anytime::Service->new(
      {
        anytime    => $self,
        id         => $node->getAttribute('serviceId'),
        name       => $xpc->findvalue("./tva:Name", $node),
        owner      => $xpc->findvalue("./tva:Owner", $node),
        logo       => $xpc->findvalue("./tva:Logo", $node),
        genres_ref => \@genres,
      }
      );
  }
  return @services;
}

sub services_television {
  my $self = shift;
  return grep { $_->is_television } $self->services;
}

sub services_radio {
  my $self = shift;
  return grep { $_->is_radio } $self->services;
}

sub _parse_file {
  my ($self, $filename) = @_;
  my $directory = $self->directory;
  my $path      = $filename;
  $path = dir($self->directory, $filename) unless $filename =~ /$directory/;
  my $parser = XML::LibXML->new;
  my $doc    = $parser->parse_file($path);
  return $self->_xpc($doc);
}

sub _xpc {
  my ($self, $node) = @_;
  my $xpc = XML::LibXML::XPathContext->new($node);
  $xpc->registerNs('tva', 'urn:tva:metadata:2002');
  $xpc->registerNs('rss', 'http://purl.org/rss/1.0/');
  $xpc->registerNs('cr',
    'http://www.tv-anytime.org/2002/02/ContentReferencing');
  return $xpc;
}

sub _parse_date {
  my ($self, $string) = @_;
  my $dt = DateTime::Format::ISO8601->parse_datetime($string);
  return $dt;
}

sub _parse_duration {
  my ($self, $string) = @_;
  my $d = DateTime::Format::Duration->new(pattern => 'PT%HH%MM%SS',);
  return $d->parse_duration($string);
}

1;

__END__

=head1 NAME

TV::Anytime - Parse TV-AnyTime bundles of TV and Radio listings

=head1 SYNOPSIS

  use TV::Anytime;
  my $tv = TV::Anytime->new("data/20050701/");

  # Find out what services are available
  my @services = $tv->services;
  my @radio_services = $tv->services_radio;
  my @tv_services = $tv->services_television;
  my @groups = $tv->groups;

=head1 DESCRIPTION

The L<TV::Anytime> module parses TV-Anytime bundles. TV-Anytime is a format organised
by the TV-Anytime Forum (L<http://www.tv-anytime.org/>). These are open
standards (see ETSI TS102822) for the rich description of Radio,
Television and other types of media. The metadata specification includes
a comprehensive genre scheme, methods of linking and grouping
programmes, listing credits and lots of other data fields.

This module is concerned with parsing TV-Anytime files that are shipped
by the British Broadcasting Corporation from
L<http://backstage.bbc.co.uk/feeds/tvradio/doc.html>. It is assumed that
you have downloaded a .tar.gz from this site and have unpacked it.

=head1 METHODS

=head2 new()

The new() method is the constructor. It takes the directory into which
you have unpacked the TV-Anytime files:

  my $tv = TV::Anytime->new("data/20050701/");

=head2 groups

The groups() method returns a list of all the available groups as a
list of L<TV::Anytime::Group> objects:

  my @groups = $tv->groups;

=head2 services

The services() method returns a list of all the available services as a
list of L<TV::Anytime::Service> objects:

  my @services = $tv->services;
  
=head2 services_radio

The services_radio() method returns a list of the available radio
services as a list of L<TV::Anytime::Service> objects:

  my @radio_services = $tv->services_radio;

=head2 services_television

The serviices_television() method returns a list of all the available
television services as a list of L<TV::Anytime::Service> objects:

  my @tv_services = $tv->services_television;

=head1 SEE ALSO 

L<TV::Anytime::Service>

=head1 BUGS                                                   
                                                                                
Please report any bugs or feature requests to                                   
C<bug-TV-Anytime@rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org>.  

=head1 AUTHOR

Leon Brocard C<acme@astray.com>

=head1 LICENCE AND COPYRIGHT                                                    
                                                                                
Copyright (c) 2005, Leon Brocard C<acme@astray.com>. All rights reserved.
                                                                                
This module is free software; you can redistribute it and/or                    
modify it under the same terms as Perl itself.