The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Plagger::Plugin::Filter::TruePermalink;
use strict;
use base qw( Plagger::Plugin );

use DirHandle;
use YAML;
use Plagger::UserAgent;
use URI;
use URI::QueryParam;

sub init {
    my $self = shift;
    $self->SUPER::init(@_);
    $self->conf->{follow_redirect} = 1 unless exists $self->conf->{follow_redirect};
    $self->load_plugins;
}

sub load_plugins {
    my $self = shift;

    $self->load_assets(
        '*.yaml',
        sub {
            my $file = shift;
            Plagger->context->log(debug => "loading $file");
            my $data = YAML::LoadFile($file);
            if (ref($data) eq 'ARRAY') {
                # redirectors.yaml ... make it backward compatible to ignore
            } else {
                push @{$self->{plugins}}, $data;
            }
        },
    );
}

sub register {
    my($self, $context) = @_;
    $context->register_hook(
        $self,
        'update.entry.fixup' => \&update,
    );
}

sub update {
    my($self, $context, $args) = @_;

    $self->rewrite(sub { $args->{entry}->permalink }, sub { $args->{entry}->permalink(@_) }, $args);
    for my $enclosure ($args->{entry}->enclosures) {
        $self->rewrite(sub { $enclosure->url }, sub { $enclosure->url( URI->new(@_) ) }, $args);
    }
}

sub rewrite {
    my($self, $getter, $callback, $args) = @_;

    my $loop;
    while ($self->rewrite_link($getter, $callback, $args)) {
        if ($loop++ >= 10) {
            $self->log(error => "Possible infinite loop on " . $getter->());
            return;
        }
    }
}

sub rewrite_link {
    my($self, $getter, $callback, $args) = @_;

    my $context = Plagger->context;

    my $link = $getter->();
    my $orig = $link; # copy
    my $count = 0;
    my $rewritten;

    for my $plugin (@{ $self->{plugins}}) {
        my $match = $plugin->{match} || '.'; # anything
        next unless $link =~ m/$match/i;

        if ($plugin->{rewrite}) {
            local $_ = $link;
            my $done = eval $plugin->{rewrite};
            if ($@) {
                $context->error("$@ in $plugin->{rewrite}");
            } elsif ($done) {
                $count += $done;
                $rewritten = $_;
                last;
            }
        } elsif ($plugin->{query_param}) {
            my $param = URI->new($link)->query_param($plugin->{query_param})
                or $context->error("No query param $plugin->{query_param} in " . $link);
            $count++;
            $rewritten = $param;
            last;
        }
    }

    # No match to known sites. Try redirect by issuing GET
    if (!$count && $self->conf->{follow_redirect}) {
        my $url = $self->follow_redirect($link);
        if ($url && $url ne $link) {
            $count++;
            $rewritten = $url;
        }
    }

    if ($count) {
        $callback->($rewritten);
        $context->log(info => "Link $orig rewritten to $rewritten");
    }

    return $count;
}

sub follow_redirect {
    my($self, $link) = @_;

    my $url = $self->cache->get_callback(
        "redirector:$link",
        sub {
            Plagger->context->log(debug => "Issuing GET to $link to follow redirects");
            my $ua  = Plagger::UserAgent->new;

            # don't care about content body ... immediately die
            my $res = $ua->simple_request( HTTP::Request->new(GET => $link), sub { die } );
            if ($res->is_redirect) {
                return $res->header('Location');
            }
            return '';
        },
        '1 day',
    );

    Plagger->context->log(debug => "Resolved redirection of $link => $url") if $url;

    return $url;
}

1;

__END__

=head1 NAME

Plagger::Plugin::Filter::TruePermalink - Normalize permalink using its own plugin files

=head1 SYNOPSIS

  - module: Filter::TruePermalink

=head1 DESCRIPTION

This plugin normalizes permalink using YAML based URL pattern
files.

This plugin rewrites I<permalink> attribute of C<$entry>, while
keeping I<link> as is. If C<$entry> has enclosures, this plugin also
tries to rewrite url of them.

=head1 CONFIG

=over 4

=item follow_redirect

If set to 1, this plugin issues GET request to entry permalinks to see
if the server returns 301 or 302 redirect to other URL. Defaults to 1.

=back

=head1 PATTERN FILES

You can write your own pattern file using YAML data format. Usable keys are:

=over 4

=item author

Your name. (Optional)

=item match

Regular expression rule to match with entry's link. Rewrites only
happen when the URL form matches. You can omit this configuration to
apply the rewrite rule to any URLs.

=item rewrite

Replacement regexp to filter permalink. Permalink is stored in C<$_> variable so that you can write:

  rewrite: s/;jsession_id=\w+//

=item query_param

URL query parameter to extract normalized permalink.

  query_param: destination

=back

See C<assets/plugins/Filter-TruePermalink> for more examples.

=head1 AUTHOR

youpy

Tatsuhiko Miyagawa

=head1 SEE ALSO

L<Plagger>

=cut