The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package WWW::YouTube::Download;

use strict;
use warnings;
use 5.008001;

our $VERSION = '0.58';

use Carp qw(croak);
use URI ();
use LWP::UserAgent;
use JSON;
use HTML::Entities qw/decode_entities/;
use HTTP::Request;

$Carp::Internal{ (__PACKAGE__) }++;

use constant DEFAULT_FMT => 18;

my $base_url = 'http://www.youtube.com/watch?v=';

sub new {
    my $class = shift;
    my %args = @_;
    $args{ua} = LWP::UserAgent->new(
        agent      => __PACKAGE__.'/'.$VERSION,
        parse_head => 0,
    ) unless exists $args{ua};
    bless \%args, $class;
}

for my $name (qw[video_id video_url title user fmt fmt_list suffix]) {
    no strict 'refs';
    *{"get_$name"} = sub {
        use strict 'refs';
        my ($self, $video_id) = @_;
        croak "Usage: $self->get_$name(\$video_id|\$watch_url)" unless $video_id;
        my $data = $self->prepare_download($video_id);
        return $data->{$name};
    };
}

sub playback_url {
    my ($self, $video_id, $args) = @_;
    croak "Usage: $self->playback_url('[video_id|video_url]')" unless $video_id;
    $args ||= {};

    my $data = $self->prepare_download($video_id);
    my $fmt  = $args->{fmt} || $data->{fmt} || DEFAULT_FMT;
    my $video_url = $data->{video_url_map}{$fmt}{url} || croak "this video does not offer format (fmt) $fmt";

    return $video_url;
}

sub download {
    my ($self, $video_id, $args) = @_;
    croak "Usage: $self->download('[video_id|video_url]')" unless $video_id;
    $args ||= {};

    my $data = $self->prepare_download($video_id);

    my $fmt = $args->{fmt} || $data->{fmt} || DEFAULT_FMT;

    my $video_url = $data->{video_url_map}{$fmt}{url} || croak "this video has not supported fmt: $fmt";
    $args->{filename} ||= $args->{file_name};
    my $filename = $self->_format_filename($args->{filename}, {
        video_id   => $data->{video_id},
        title      => $data->{title},
        user       => $data->{user},
        fmt        => $fmt,
        suffix     => $data->{video_url_map}{$fmt}{suffix} || _suffix($fmt),
        resolution => $data->{video_url_map}{$fmt}{resolution} || '0x0',
    });

    $args->{cb} = $self->_default_cb({
        filename  => $filename,
        verbose   => $args->{verbose},
        overwrite => defined $args->{overwrite} ? $args->{overwrite} : 1,
    }) unless ref $args->{cb} eq 'CODE';

    my $res = $self->ua->get($video_url, ':content_cb' => $args->{cb});
    croak "!! $video_id download failed: ", $res->status_line if $res->is_error;
}

sub _format_filename {
    my ($self, $filename, $data) = @_;
    return "$data->{video_id}.$data->{suffix}" unless defined $filename;
    $filename =~ s#{([^}]+)}#$data->{$1} || "{$1}"#eg;
    return $filename;
}

sub _is_supported_fmt {
    my ($self, $video_id, $fmt) = @_;
    my $data = $self->prepare_download($video_id);
    defined($data->{video_url_map}{$fmt}{url}) ? 1 : 0;
}

sub _default_cb {
    my ($self, $args) = @_;
    my ($file, $verbose, $overwrite) = @$args{qw/filename verbose overwrite/};

    croak "file exists! $file" if -f $file and !$overwrite;
    open my $wfh, '>', $file or croak $file, " $!";
    binmode $wfh;

    print "Downloading `$file`\n" if $verbose;
    return sub {
        my ($chunk, $res, $proto) = @_;
        print $wfh $chunk; # write file

        if ($verbose || $self->{verbose}) {
            my $size = tell $wfh;
            my $total = $res->header('Content-Length');
            printf "%d/%d (%.2f%%)\r", $size, $total, $size / $total * 100;
            print "\n" if $total == $size;
        }
    };
}

sub prepare_download {
    my ($self, $video_id) = @_;
    croak "Usage: $self->prepare_download('[video_id|watch_url]')" unless $video_id;
    $video_id = $self->video_id($video_id);

    return $self->{cache}{$video_id} if ref $self->{cache}{$video_id} eq 'HASH';

    my $content       = $self->_get_content($video_id);
    my $title         = $self->_fetch_title($content);
    my $user          = $self->_fetch_user($content);
    my $video_url_map = $self->_fetch_video_url_map($content);

    my $fmt_list = [];
    my $sorted = [
        map {
            push @$fmt_list, $_->[0]->{fmt};
            $_->[0]
        } sort {
            $b->[1] <=> $a->[1]
        } map {
            my $resolution = $_->{resolution};
            $resolution =~ s/(\d+)x(\d+)/$1 * $2/e;
            [ $_, $resolution ]
        } values %$video_url_map,
    ];

    my $hq_data = $sorted->[0];

    return $self->{cache}{$video_id} = {
        video_id      => $video_id,
        video_url     => $hq_data->{url},
        title         => $title,
        user          => $user,
        video_url_map => $video_url_map,
        fmt           => $hq_data->{fmt},
        fmt_list      => $fmt_list,
        suffix        => $hq_data->{suffix},
        resolution    => $hq_data->{resolution},
    };
}

sub _fetch_title {
    my ($self, $content) = @_;

    my ($title) = $content =~ /<meta name="title" content="(.+?)">/ or return;
    return decode_entities($title);
}

sub _fetch_user {
    my ($self, $content) = @_;

    if( $content =~ /<span class="yt-user-name [^>]+>([^<]+)<\/span>/ ){
        return decode_entities($1);
    }elsif( $content =~ /","author":"([^"]+)","/ ){
        return decode_entities($1);
    }else{
        return;
    }	
}

sub _fetch_video_url_map {
    my ($self, $content) = @_;

    my $args = $self->_get_args($content);
    unless ($args->{fmt_list} and $args->{url_encoded_fmt_stream_map}) {
        croak 'failed to find video urls';
    }

    my $fmt_map     = _parse_fmt_map($args->{fmt_list});
    my $fmt_url_map = _parse_stream_map($args->{url_encoded_fmt_stream_map});

    my $video_url_map = +{
        map {
            $_->{fmt} => $_,
        } map +{
            fmt        => $_,
            resolution => $fmt_map->{$_},
            url        => $fmt_url_map->{$_},
            suffix     => _suffix($_),
        }, keys %$fmt_map
    };

    return $video_url_map;
}

sub _get_content {
    my ($self, $video_id) = @_;

    my $url = "$base_url$video_id";

    my $req = HTTP::Request->new;
    $req->method('GET');
    $req->uri($url);
    $req->header('Accept-Language' => 'en-US');

    my $res = $self->ua->request($req);
    croak "GET $url failed. status: ", $res->status_line if $res->is_error;

    return $res->content;
}

sub _get_args {
    my ($self, $content) = @_;

    my $data;
    for my $line (split "\n", $content) {
        next unless $line;
        if ($line =~ /the uploader has not made this video available in your country/i) {
            croak 'Video not available in your country';
        }
        elsif ($line =~ /^.+ytplayer\.config\s*=\s*({.*})/) {
            ($data, undef) = JSON->new->utf8(1)->decode_prefix($1);
            last;
        }
    }

    croak 'failed to extract JSON data' unless $data->{args};

    return $data->{args};
}

sub _parse_fmt_map {
    my $param = shift;
    my $fmt_map = {};
    for my $stuff (split ',', $param) {
        my ($fmt, $resolution) = split '/', $stuff;
        $fmt_map->{$fmt} = $resolution;
    }

    return $fmt_map;
}

sub _sigdecode {
    my @s = @_;

    # based on youtube_dl/extractor/youtube.py from yt-dl.org
    if (@s == 93) {
        return (reverse(@s[30..86]), $s[88], reverse(@s[6..28]));
    } elsif (@s == 92) {
        return ($s[25], @s[3..24], $s[0], @s[26..41], $s[79], @s[43..78], $s[91], @s[80..82]);
    } elsif (@s == 91) {
        return (reverse(@s[28..84]), $s[86], reverse(@s[6..26]));
    } elsif (@s == 90) {
        return ($s[25], @s[3..24], $s[2], @s[26..39], $s[77], @s[41..76], $s[89], @s[78..80]);
    } elsif (@s == 89) {
        return (reverse(@s[79..84]), $s[87], reverse(@s[61..77]), $s[0], reverse(@s[4..59]));
    } elsif (@s == 88) {
        return (@s[7..27], $s[87], @s[29..44], $s[55], @s[46..54], $s[2], @s[56..86], $s[28]);
    } elsif (@s == 87) {
        return (@s[6..26], $s[4], @s[28..38], $s[27], @s[40..58], $s[2], @s[60..86]);
    } elsif (@s == 86) {
        return (@s[4..30], $s[3], @s[32..84]);
    } elsif (@s == 85) {
        return (@s[3..10], $s[0], @s[12..54], $s[84], @s[56..83]);
    } elsif (@s == 84) {
        return (reverse(@s[71..78]), $s[14], reverse(@s[38..69]), $s[70], reverse(@s[15..36]), $s[80], reverse(@s[0..13]));
    } elsif (@s == 83) {
        return (reverse(@s[64..80]), $s[0], reverse(@s[1..62]), $s[63]);
    } elsif (@s == 82) {
        return (reverse(@s[38..80]), $s[7], reverse(@s[8..36]), $s[0], reverse(@s[1..6]), $s[37]);
    } elsif (@s == 81) {
        return ($s[56], reverse(@s[57..79]), $s[41], reverse(@s[42..55]), $s[80], reverse(@s[35..40]), $s[0], reverse(@s[30..33]), $s[34], reverse(@s[10..28]), $s[29], reverse(@s[1..8]), $s[9]);
    } elsif (@s == 80) {
        return (@s[1..18], $s[0], @s[20..67], $s[19], @s[69..79]);
    } elsif (@s == 79) {
        return ($s[54], reverse(@s[55..77]), $s[39], reverse(@s[40..53]), $s[78], reverse(@s[35..38]), $s[0], reverse(@s[30..33]), $s[34], reverse(@s[10..28]), $s[29], reverse(@s[1..8]), $s[9]);
    }

    return ();    # fail
}

sub _getsig {
    my $sig = shift;
    croak 'Unable to find signature' unless $sig;
    my @sig = _sigdecode(split(//, $sig));
    croak "Unable to decode signature $sig of length " . length($sig) unless @sig;
    return join('', @sig);
}

sub _parse_stream_map {
    my $param       = shift;
    my $fmt_url_map = {};
    for my $stuff (split ',', $param) {
        my $uri = URI->new;
        $uri->query($stuff);
        my $query = +{ $uri->query_form };
        my $sig = $query->{sig} || ($query->{s} ? _getsig($query->{s}) : undef);
        my $url = $query->{url};
        $fmt_url_map->{$query->{itag}} = $url . ($sig ? '&signature='.$sig : '');
    }

    return $fmt_url_map;
}

sub ua {
    my ($self, $ua) = @_;
    return $self->{ua} unless $ua;
    croak "Usage: $self->ua(\$LWP_LIKE_OBJECT)" unless eval { $ua->isa('LWP::UserAgent') };
    $self->{ua} = $ua;
}

sub _suffix {
    my $fmt = shift;
    return $fmt =~ /43|44|45|46/ ? 'webm'
         : $fmt =~ /18|22|37|38/ ? 'mp4'
         : $fmt =~ /13|17/       ? '3gp'
         :                         'flv'
    ;
}

sub video_id {
    my ($self, $stuff) = @_;
    return unless $stuff;
    if ($stuff =~ m{/.*?[?&;!](?:v|video_id)=([^&#?=/;]+)}) {
        return $1;
    }
    elsif ($stuff =~ m{/(?:e|v|embed)/([^&#?=/;]+)}) {
        return $1;
    }
    elsif ($stuff =~ m{#p/(?:u|search)/\d+/([^&?/]+)}) {
        return $1;
    }
    elsif ($stuff =~ m{youtu.be/([^&#?=/;]+)}) {
        return $1;
    }
    else {
        return $stuff;
    }
}

sub playlist_id {
    my ($self, $stuff) = @_;
    return unless $stuff;
    if ($stuff =~ m{/.*?[?&;!]list=([^&#?=/;]+)}) {
        return $1;
    }
    elsif ($stuff =~ m{^\s*([FP]L[\w\-]+)\s*$}) {
        return $1;
    }
    return $stuff;
}

sub user_id {
    my ($self, $stuff) = @_;
    return unless $stuff;
    if ($stuff =~ m{/user/([^&#?=/;]+)}) {
        return $1;
    }
    return $stuff;
}

1;
__END__

=head1 NAME

WWW::YouTube::Download - Very simple YouTube video download interface

=head1 SYNOPSIS

  use WWW::YouTube::Download;

  my $client = WWW::YouTube::Download->new;
  $client->download($video_id);

  my $video_url = $client->get_video_url($video_id);
  my $title     = $client->get_title($video_id);     # maybe encoded utf8 string.
  my $fmt       = $client->get_fmt($video_id);       # maybe highest quality.
  my $suffix    = $client->get_suffix($video_id);    # maybe highest quality file suffix

=head1 DESCRIPTION

WWW::YouTube::Download is a library to download videos from YouTube. It relies entirely on
scraping a video's webpage and does not use YT's /get_video_info URL space.

=head1 METHODS

=over

=item B<new()>

  $client = WWW::YouTube::Download->new;

Creates a WWW::YouTube::Download instance.

=item B<download($video_id [, \%args])>

  $client->download($video_id);
  $client->download($video_id, {
      fmt      => 37,
      filename => 'sample.mp4', # save file name
  });
  $client->download($video_id, {
      filename => '{title}.{suffix}', # maybe `video_title.mp4`
  });
  $client->download($video_id, {
      cb => \&callback,
  });

Download the video file.
The first parameter is passed to YouTube video url.

Allowed arguments:

=over

=item C<cb>

Set a callback subroutine, SEE L<LWP::UserAgent> ':content_cb'
for details.

=item C<filename>

Set the filename, possibly using placeholders to be filled with
information gathered about the video.

C<< filename >> supported format placeholders:

  {video_id}
  {title}
  {user}
  {fmt}
  {suffix}
  {resolution}

Output filename is set to C<{video_id}.{suffix}> by default.

=item C<file_name>

B<< DEPRECATED >> alternative for C<filename>.

=item C<fmt>

set the format to download. Defaults to the best video quality
(inferred by the available resolutions).

=back


=item B<playback_url($video_id, [, \%args])>

  $client->playback_url($video_id);
  $client->playback_url($video_id, { fmt => 37 });

Return playback URL of the video. This is direct link to the movie file.
Function supports only "fmt" option.

=item B<prepare_download($video_id)>

Gather data about the video. A hash reference is returned, with the following
keys:

=over

=item C<fmt>

the default, suggested format. It is inferred by selecting the
alternative with the highest resolution.

=item C<fmt_list>

the list of available formats, as an array reference.

=item C<suffix>

the filename extension associated to the default format (see C<fmt>
above).

=item C<title>

the title of the video

=item C<user>

the YouTube user owning the video

=item C<video_id>

the video identifier

=item C<video_url>

the URL of the video associated to the default format (see C<fmt>
above).

=item C<video_url_map>

an hash reference containing details about all available formats.

=back

The C<video_url_map> has one key/value pair for each available format,
where the key is the format identifier (can be used as C<fmt> parameter
for L</download>, for example) and the value is a hash reference with
the following data:

=over

=item C<fmt>

the format specifier, that can be passed to L</download>

=item C<resolution>

the resolution as I<width>xI<height>

=item C<suffix>

the suffix, providing a hint about the video format (e.g. webm, flv, ...)

=item C<url>

the URL where the video can be found

=back

=item B<ua([$ua])>

  $self->ua->agent();
  $self->ua($LWP_LIKE_OBJECT);

Sets and gets LWP::UserAgent object.

=item B<video_id($url)>

Parses given URL and returns video ID.

=item B<playlist_id($url)>

Parses given URL and returns playlist ID.

=item B<user_id($url)>

Parses given URL and returns YouTube username.

=item B<get_video_url($video_id)>

=item B<get_title($video_id)>

=item B<get_user($video_id)>

=item B<get_fmt($video_id)>

=item B<get_fmt_list($video_id)>

=item B<get_suffix($video_id)>

=back

=head1 AUTHOR

xaicron E<lt>xaicron {@} cpan.orgE<gt>

=head1 CONTRIBUTORS

yusukebe

=head1 BUG REPORTING

Plese use github issues: L<< https://github.com/xaicron/p5-www-youtube-download/issues >>.

=head1 SEE ALSO

L<WWW::YouTube::Info> and L<WWW::YouTube::Info::Simple>.
L<WWW::NicoVideo::Download>
L<http://rg3.github.io/youtube-dl/>

=head1 LICENSE

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut