lib/Perlanet.pm - metacpan.org

package Perlanet;

use strict;
use warnings;

use Moose;
use namespace::autoclean;

use Carp;
use DateTime::Duration;
use DateTime;
use Perlanet::Entry;
use Perlanet::Feed;
use Try::Tiny;
use URI::Fetch;
use XML::Feed;

use vars qw{$VERSION};

BEGIN {
  $VERSION = '1.1.1';
}

with 'MooseX::Traits';

$XML::Atom::ForceUnicode = 1;

has 'ua' => (
  is         => 'rw',
  isa        => 'LWP::UserAgent',
  lazy_build => 1
);

sub _build_ua {
  my $self = shift;
  my $ua = LWP::UserAgent->new(
    agent => "Perlanet/$VERSION"
  );
  $ua->show_progress(1) if -t STDOUT;
  $ua->env_proxy;

  return $ua;
}

has 'cutoff' => (
  isa     => 'DateTime',
  is      => 'ro',
  default => sub {
    DateTime->now + DateTime::Duration->new(weeks => 1);
  }
);

has 'entries' => (
  isa => 'Int',
  is  => 'rw',
  default => 10,
);

has 'entries_per_feed' => (
  isa => 'Int',
  is  => 'rw',
  default => 5,
);

has 'feeds' => (
  isa     => 'ArrayRef',
  is      => 'ro',
  default => sub { [] }
);

has 'author' => (
  isa     => 'HashRef',
  is      => 'ro',
);

has $_ => (
  isa => 'Str',
  is  => 'ro',
) for qw( self_link title description url agent );

has entry_sort_order => (
  isa => 'Str',
  is  => 'ro',
  default => 'modified',
);

=head1 NAME

Perlanet - A program for creating programs that aggregate web feeds (both
RSS and Atom).

=head1 SYNOPSIS

  my $perlanet = Perlanet->new;
  $perlanet->run;

=head1 DESCRIPTION

Perlanet is a program for creating programs that aggregate web feeds (both
RSS and Atom). Web pages like this are often called "Planets" after the Python
software which originally popularised them. Perlanet is a planet builder
written in Perl - hence "Perlanet".

You are probably interested in L<Perlanet::Simple> to get started straight
out of the box, batteries included style.

Perlanet itself is the driving force behind everything, however. Perlanet
reads a series of web feeds (filtering only those that are valid), sorts
and selects entries from these web feeds, and then creates a new aggregate
feed and renders this aggregate feed. Perlanet allows the user to customize
all of these steps through subclassing and roles.

For most uses, you probably don't want to use the Perlanet module. The
L<perlanet> command line program is far more likely to be useful.

=head1 CONSTRUCTOR

=head2 new

  my $perlanet = Perlanet->new

The constructor method. Can be passed a hashref of initialisers.

See L</ATTRIBUTES> below for details of the key/value pairs to pass in.

=head1 ATTRIBUTES

=over

=item ua

An instance of L<LWP::UserAgent>. Defaults to a simple agent using C<<
$cfg->{agent} >> as the user agent name, or C< Perlanet/$VERSION >.

=item cutoff

An instance of L<DateTime> which represents the earliest date for
which feed posts will be fetched/shown.

=item feeds

An arrayref of L<Perlanet::Feed> objects representing the feeds to
collect data from.

=back

=head1 METHODS

=head2 fetch_page

Attempt to fetch a web page and a returns a L<URI::Fetch::Response> object.

=cut

sub fetch_page {
  my ($self, $url) = @_;
  return URI::Fetch->fetch(
    $url,
    UserAgent     => $self->ua,
    ForceResponse => 1,
  );
}

=head2 fetch_feeds

Called internally by L</run> and passed the list of feeds in L</feeds>.

Attempt to download all given feeds, as specified in the C<feeds> attribute.
Returns a list of L<Perlanet::Feed> objects, with the actual feed data
loaded.

NB: This method also modifies the contents of L</feeds>.

=cut

sub fetch_feeds {
  my ($self, $feeds) = @_;

  my @valid_feeds;
  for my $feed (@$feeds) {
    my $response = $self->fetch_page($feed->url);

    if ($response->is_error) {
      carp 'Error retrieving ' . $feed->url;
      carp $response->http_response->status_line;
      next;
    }

    unless (length $response->content) {
      carp 'No data returned from ' . $feed->url;
      next;
    }

    try {
      my $data = $response->content;
      my $xml_feed = XML::Feed->parse(\$data);

      $feed->_xml_feed($xml_feed);
      $feed->title($xml_feed->title) unless $feed->title;

      push @valid_feeds, $feed;
    }
    catch {
      carp 'Errors parsing ' . $feed->url;
      carp $_ if defined $_;
    };
  }

  return \@valid_feeds;
}

=head2 select_entries

Called internally by L</run> and passed the list of feeds from
L</fetch_feeds>.

Returns a combined list of L<Perlanet::Entry> objects from all given feeds.

=cut

sub select_entries {
  my ($self, $feeds) = @_;

  my @feed_entries;
  for my $feed (@$feeds) {
    my @entries = $feed->_xml_feed->entries;

    @entries = @{ $self->sort_entries(\@entries) };

    if ($self->entries_per_feed and @entries > $self->entries_per_feed) {
      $#entries = $self->entries_per_feed - 1;
    }

    push @feed_entries,
      map {
        $_->title($feed->title . ': ' . $_->title);

        # Problem with XML::Feed's conversion of RSS to Atom
        if ($_->issued && ! $_->modified) {
          $_->modified($_->issued);
        }

        Perlanet::Entry->new(
          _entry => $_,
          feed => $feed
        );
      } @entries;
  }

  return \@feed_entries;
}

=head2 sort_entries

Called internally by L</run> and passed the list of entries from
L</select_entries>.

Sort the given list of entries into created/modified order for aggregation,
and filters them if necessary.

Takes a list of L<Perlanet::Entry>s, and returns an ordered list.

=cut

sub sort_entries {
  my ($self, $entries) = @_;
  my $day_zero = DateTime->from_epoch(epoch => 0);

  my @entries;

  if ($self->entry_sort_order eq 'modified') {
    @entries = grep {
      ($_->modified || $_->issued || $day_zero) < $self->cutoff
    } sort {
      ($b->modified || $b->issued || $day_zero)
          <=>
      ($a->modified || $a->issued || $day_zero)
    } @$entries;
  } elsif ($self->entry_sort_order eq 'issued') {
    @entries = grep {
      ($_->issued || $_->modified || $day_zero) < $self->cutoff
    } sort {
      ($b->issued || $b->modified || $day_zero)
          <=>
      ($a->issued || $a->modified || $day_zero)
    } @$entries;
  } else {
    die 'Invalid entry sort order: ' . $self->entry_sort_order;
  }

  # Only need so many entries
  if ($self->entries && @entries > $self->entries) {
    $#entries = $self->entries - 1;
  }

  return \@entries;
}

=head2 build_feed

Called internally by L</run> and passed the list of entries from
L</sort_entries>.

Takes a list of L<Perlanet::Entry>s, and returns a L<Perlanet::Feed>
that is the actual feed for the planet.

=cut

sub build_feed {
  my ($self, $entries) = @_;

  my $self_url = $self->self_link;

  my $f = Perlanet::Feed->new( modified    => DateTime->now );
  $f->title($self->title)             if defined $self->title;
  $f->url($self->url)                 if defined $self->url;
  $f->description($self->description) if defined $self->description;
  if (defined $self->{'author'} ) {
    $f->author($self->author->{name})   if defined $self->author->{name};
    $f->email($self->author->{email})   if defined $self->author->{email};
  }
  $f->self_link($self->url)           if defined $self->url;
  $f->id($self->url)                  if defined $self->url;

  $f->add_entry($_) for @$entries;

  return $f;
}

=head2 clean_html

Clean a HTML string so it is suitable for display.

Takes a HTML string and returns a "cleaned" HTML string.

=cut

sub clean_html {
  my ($self, $entry) = @_;
  return $entry;
}

=head2 clean_entries

Clean all entries for the planet.

Takes a list of entries, runs them through C<clean> and returns a list of
cleaned entries.

=cut

sub clean_entries {
  my ($self, $entries) = @_;

  my @clean_entries;

  foreach (@$entries) {
    if (my $body = $_->content->body) {
      my $cleaned = $self->clean_html($body);
      $_->content->body($cleaned);
    }

    if (my $summary = $_->summary->body) {
      my $cleaned = $self->clean_html($summary);
      $_->summary->body($cleaned);
    }

    push @clean_entries, $_;
  }

  return \@clean_entries;
}

=head2 render

Called internally by L</run> and passed the feed from L</build_feed>.

This is the hook where you generate some type of page to display the result
of aggregating feeds together (ie, inserting the posts into a database,
running a HTML templating library, etc)

Takes a L<Perlanet::Feed> as input (as generated by L<build_feed>.

=cut

sub render {
  my ($self, $feed) = @_;
}

=head2 run

The main method which runs the perlanet process.

=cut

sub run {
  my $self = shift;

  my $feeds    = $self->fetch_feeds($self->feeds);
  my $selected = $self->select_entries($feeds);
  my $sorted   = $self->sort_entries($selected);
  my $cleaned  = $self->clean_entries($sorted);
  my $feed     = $self->build_feed($cleaned);

  $self->render($feed);
}

=head1 TO DO

See L<http://wiki.github.com/davorg/perlanet/issues>

=head1 SUPPORT

There is a mailing list which acts as both a place for developers to talk
about maintaining and improving Perlanet and also for users to get support.
You can sign up to this list at
L<http://lists.mag-sol.com/mailman/listinfo/perlanet>

To report bugs in Perlanet, please use the CPAN request tracker. You can
either use the web page at
L<http://rt.cpan.org/Public/Bug/Report.html?Queue=Perlanet> or send an email
to bug-Perlanet@rt.cpan.org.

=head1 SEE ALSO

=over 4

=item *

L<perlanet>

=item *

L<Plagger>

=back

=head1 AUTHOR

Dave Cross, <dave@mag-sol.com>

=head1 COPYRIGHT AND LICENSE

Copyright (c) 2010 by Magnum Solutions Ltd.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.0 or,
at your option, any later version of Perl 5 you may have available.

=cut

1;
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)