The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
# $File: //member/autrijus/XML-RSS-Aggregate/lib/XML/RSS/Aggregate.pm $ $Author: autrijus $
# $Revision: #4 $ $Change: 2924 $ $DateTime: 2002/12/25 15:04:33 $

package XML::RSS::Aggregate;
$XML::RSS::Aggregate::VERSION = '0.02';

use strict;
use XML::RSS;
use base 'XML::RSS';

use Date::Parse;
use LWP::Simple 'get';
use HTML::Entities 'encode_entities';

=head1 NAME

XML::RSS::Aggregate - RSS Aggregator

=head1 SYNOPSIS

    my $rss = XML::RSS::Aggregate->new(
        # parameters for XML::RSS->channel()
        title   => 'Aggregated Examples',
        link    => 'http://blog.elixus.org/',

        # parameters for XML::RSS::Aggregate->aggregate()
        sources => [ qw(
            http://one.example.com/index.rdf
            http://another.example.com/index.rdf
            http://etc.example.com/index.rdf
        ) ],
        sort_by => sub {
            $_[0]->{dc}{subject}    # default to sort by dc:date
        },
        uniq_by => sub {
            $_[0]->{title}          # default to uniq by link
        }
    );

    $rss->aggregate( sources => [ ... ] );  # more items
    $rss->save("all.rdf");

=head1 DESCRIPTION

This module implements a subclass of B<XML::RSS>, adding a single
C<aggregate> method that fetches other RSS feeds and add to the object
itself.  It handles the proper ordering and duplication removal for
aggregated links.

Also, the constructor C<new> is modified to take arguments to pass
implicitly to C<channel> and C<aggregate> methods.

All the base methods are still applicable to this module; please see
L<XML::RSS> for details.

=head1 METHODS

=over 4

=item aggregate (sources=>\@url, sort_by=>\&func, uniq_by=>\&func)

This method fetches all RSS feeds listed in C<@url> and pass their
items to the object's C<add_item>.

The optional C<sort_by> argument specifies the function to use for
ordering RSS items; it defaults to sort them by their C<{dc}{date}>
attribute (converted to absolute timestamps), with ties broken by
their C<{link}> attribute.

The optional C<uniq_by> argument specifies the function to use for
removing duplicate RSS items; it defaults to remove items that has
the same C<{link}> value.

=back

=cut

sub new {
    my ($class, %args) = @_;

    my $version = delete($args{version}) || '1.0';
    my $self    = $class->SUPER::new( version => $version );

    my $sources = delete($args{sources});
    my $sort_by = delete($args{sort_by});

    $self->channel(%args) if %args;
    $self->aggregate(
        sources => $sources,
        sort_by => $sort_by,
    ) if $sources;

    return $self;
}

sub aggregate {
    my ($self, %args) = @_;

    my $sources = $args{sources} or return;
    my $sort_by = $args{sort_by} || sub {
        my $date = $_[0]->{dc}{date};
        $date =~ s/:(\d\d)$/$1/ if $date;
        sprintf("%20s", str2time($date)).$_[0]->{link}
    };
    my $uniq_by = $args{uniq_by} || sub {
        $_[0]->{link}
    };

    my $old_items = $self->{items} || [];
    $self->{items} = [];

    my %saw;
    $self->add_item(%{$_->[0]}) for
        sort { $b->[1] cmp $a->[1] }
        grep { $_->[1] }
        map  { [ $_ => scalar($sort_by->($_)) ] }
        grep { !$saw{$uniq_by->($_)}++ } @{$old_items},
        map  { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_}; $_ }
        map  { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_->{dc}}; $_ }
        map  { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_->{syn}}; $_ }
        map  { encode_entities($_, '&<>') for grep {!ref($_)} @{$_->{taxo}}; $_ }
        map  { eval { (my $rss = XML::RSS->new)->parse(get($_)); @{$rss->{items}} } }
        grep { /^\w+:/ } @{$sources};

    return $self;
}

1;

=head1 SEE ALSO

L<XML::RSS>

=head1 AUTHORS

Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>

=head1 COPYRIGHT

Copyright 2002 by Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

See L<http://www.perl.com/perl/misc/Artistic.html>

=cut

__END__
# Local variables:
# c-indentation-style: bsd
# c-basic-offset: 4
# indent-tabs-mode: nil
# End:
# vim: expandtab shiftwidth=4: