The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w
use strict;
use utf8;
use DateTime;
use DateTime::Format::W3CDTF;
use Encode;
use LWP::Simple ();
use YAML;

my @url = ("http://www.wowow.co.jp/hivision/list_hv.html",
	   "http://www.wowow.co.jp/hivision/list_51.html");

my %seen;
my @programs = grep { !$seen{"$_->{channel}|$_->{date}"}++ }
    sort { $a->{date} cmp $b->{date} }
    map fetch_program($_), @url;

binmode STDOUT, ":utf8";
print YAML::Dump +{
    title => 'WOWOW HV / 5.1ch programs',
    link  => "http://www.wowow.co.jp/hivision/indexh.html",
    entry => [
        map {
            my @tags = ($_->{channel});
            push @tags, 'HV'    if $_->{hivision};
            push @tags, '5.1ch' if $_->{51};
            +{ title => $_->{title},
               date  => $_->{date},
               tags  => \@tags,
               link  => $_->{link} }
        } @programs,
    ],
};

sub fetch_program {
    my $url = shift;
    my $html = LWP::Simple::get($url);
    $html = decode("shift_jis", $html);
    $html =~ tr/\r//d;
    my $re = <<'RE';
<tr bgcolor="#(?:CCCCCC|FFFFCC)"> 
  <td width="385"><span class="t12"><a href="(http://www\.wowow\.co\.jp/schedule/ghtml/.*?\.html)" target="_blank">(.*?)</a></span></td>
  <td width="45" nowrap><span class="t12">(\d+ch)</span></td>
  <td width="65" nowrap>(<img src="http://www\.wowow\.co\.jp/hivision/img/n?mark_15\.gif">)?(<img src="http://www\.wowow\.co\.jp/hivision/img/mark_51\.gif">)?</td>
  <td width="150" nowrap><span class="t12">(.*?)</span></td>
</tr>
RE
    ;
    my @program;
    while ($html =~ /$re/g) {
	my %data;
	@data{qw(link title channel hivision 51 date)} = ($1, $2, $3, $4, $5, $6);
	$data{hivision} = $data{hivision} !~ /nmark/;
        $data{date} = munge_datetime($data{date});
        push @program, \%data;
    }
    return @program;
}

sub munge_datetime {
    my $date = shift;

    # date: 2006年10月28日午後0:00~ JST
    $date =~ /^(\d{4})年(\d{1,2})月(\d{1,2})日(午前|午後|深夜)(\d{1,2}):(\d{2})/
        or die "No match: $date";
    my($year, $month, $day, $am_pm_midnight, $hour, $minute) = ($1, $2, $3, $4, $5, $6);
    $hour += 12 if $am_pm_midnight eq '午後';

    my $dt = DateTime->new(
        year  => $year,
        month => $month,
        day   => $day,
        hour  => $hour,
        minute => $minute,
        time_zone => 'Asia/Tokyo',
    );
    $dt->add( days => 1 ) if $am_pm_midnight eq '深夜';

    return DateTime::Format::W3CDTF->format_datetime($dt);
}