The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Plagger::Util;
use strict;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw( strip_html dumbnail decode_content extract_title load_uri mime_type_of );

use Encode ();
use List::Util qw(min);
use HTML::Entities;
use MIME::Types;
use MIME::Type;

our $Detector;

BEGIN {
    if ( eval { require Encode::Detect::Detector; 1 } ) {
        $Detector = sub { Encode::Detect::Detector::detect($_[0]) };
    } else {
        require Encode::Guess;
        $Detector = sub {
            my @guess = qw(utf-8 euc-jp shift_jis); # xxx japanese only?
            eval { Encode::Guess::guess_encoding($_[0], @guess)->name };
        };
    }
}



sub strip_html {
    my $html = shift;
    $html =~ s/<[^>]*>//g;
    HTML::Entities::decode($html);
}

sub dumbnail {
    my($img, $p) = @_;

    if (!$img->{width} && !$img->{height}) {
        return '';
    }

    if ($img->{width} <= $p->{width} && $img->{height} <= $p->{height}) {
        return qq(width="$img->{width}" height="$img->{height}");
    }

    my $ratio_w = $p->{width}  / $img->{width};
    my $ratio_h = $p->{height} / $img->{height};
    my $ratio   = min($ratio_w, $ratio_h);

    sprintf qq(width="%d" height="%d"), ($img->{width} * $ratio), ($img->{height} * $ratio);
}

sub decode_content {
    my $stuff = shift;

    my $content;
    my $res;
    if (ref($stuff) && ref($stuff) eq 'URI::Fetch::Response') {
        $res     = $stuff;
        $content = $res->content;
    } elsif (ref($stuff)) {
        Plagger->context->error("Don't know how to decode " . ref($stuff));
    } else {
        $content = $stuff;
    }

    my $charset;

    # 1) if it is HTTP response, get charset from HTTP Content-Type header
    if ($res) {
        $charset = ($res->content_type =~ /charset=([\w\-]+)/)[0];
    }

    # 2) if there's not, try XML encoding
    $charset ||= ( $content =~ /<\?xml version="1.0" encoding="([\w\-]+)"\?>/ )[0];

    # 3) if there's not, try META tag
    $charset ||= ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"!i )[0];

    # 4) if there's not still, try Detector/Guess
    $charset ||= $Detector->($content);

    # 5) falls back to UTF-8
    $charset ||= 'utf-8';

    my $decoded = eval { Encode::decode($charset, $content) };

    if ($@ && $@ =~ /Unknown encoding/) {
        Plagger->context->log(warn => $@);
        $charset = $Detector->($content) || 'utf-8';
        $decoded = Encode::decode($charset, $content);
    }

    $decoded;
}

sub extract_title {
    my $content = shift;
    my $title = ($content =~ m!<title>\s*(.*?)\s*</title>!is)[0] or return;
    HTML::Entities::decode($1);
}

sub load_uri {
    my($uri, $plugin) = @_;

    require Plagger::UserAgent;

    my $data;
    if (ref($uri) eq 'SCALAR') {
        $data = $$uri;
    }
    elsif ($uri->scheme =~ /^https?$/) {
        Plagger->context->log(debug => "Fetch remote file from $uri");

        my $response = Plagger::UserAgent->new->fetch($uri, $plugin);
        if ($response->is_error) {
            Plagger->context->log(error => "GET $uri failed: " .
                                  $response->http_status . " " .
                                  $response->http_response->message);
        }
        $data = decode_content($response);
    }
    elsif ($uri->scheme eq 'file') {
        Plagger->context->log(debug => "Open local file " . $uri->file);
        open my $fh, '<', $uri->file
            or Plagger->context->error( $uri->file . ": $!" );
        $data = decode_content(join '', <$fh>);
    }
    else {
        Plagger->context->error("Unsupported URI scheme: " . $uri->scheme);
    }

    return $data;
}

our $mimetypes = MIME::Types->new;
$mimetypes->addType( MIME::Type->new(type => 'video/x-flv', extensions => [ 'flv' ]) );
$mimetypes->addType( MIME::Type->new(type => 'audio/aac', extensions => [ 'm4a', '.aac' ]) );

sub mime_type_of {
    my $ext = shift;

    if (UNIVERSAL::isa($ext, 'URI')) {
        $ext = ( $ext->path =~ /\.(\w+)/ )[0];
    }

    return unless $ext;
    return $mimetypes->mimeTypeOf($ext);
}

1;