bin/yt-download - metacpan.org

#!/usr/local/bin/perl
use warnings;
use strict;
use 5.010000;

our $VERSION = '0.020';

use File::Spec::Functions qw( catdir catfile curdir );
use Getopt::Long          qw( GetOptions );
use Pod::Usage            qw( pod2usage );

use Encode::Locale         qw( decode_argv );
use File::HomeDir          qw();
use File::Which            qw( which );
use IPC::System::Simple    qw( capture );
use Term::ANSIScreen       qw( :cursor :screen );
use Term::Choose           qw( choose );
use Term::ReadLine::Simple qw();
use Try::Tiny              qw( try catch );
use URI                    qw();
use URI::Escape            qw( uri_unescape );

use if $^O eq 'MSWin32', 'Win32::Console::ANSI';
print "\e(U" if $^O eq 'MSWin32';

use App::YTDL::YTConfig    qw( read_config_file options );
use App::YTDL::YTData      qw( get_data choose_from_list_and_add_to_info wrapper_get );
use App::YTDL::YTDownload  qw( download_youtube );
use App::YTDL::YTXML       qw( xml_to_entry_node entry_nodes_to_video_ids entry_node_to_info_hash );
use App::YTDL::GenericFunc qw( encode_fs );

binmode STDIN,  ':encoding(console_in)';
binmode STDOUT, ':encoding(console_out)';
binmode STDERR, ':encoding(console_out)';

my ( $arg_file, $help );
GetOptions( 'f|file=s@' => \$arg_file, 'h|?|help' => \$help )
or pod2usage( -message => $!, -verbose => 99, -sections => "SYNOPSIS" );

my $my_videos = File::HomeDir->my_videos;
my $my_data   = File::HomeDir->my_data;
my $my_home   = File::HomeDir->my_home;

my $yt_video_dir = catdir $my_videos || $my_data || curdir, 'YouTube';

my $config_dir;
if ( $my_data ) {
    $config_dir = catdir $my_data, 'yt_download';
}
else {
    $config_dir = catdir $my_home || curdir, '.yt_download';
}

mkdir $config_dir  or die $! if ! -d $config_dir;

my $opt = {
    back           => 'BACK',
    s_back         => '<<',
    next           => 'NEXT',
    quit           => 'QUIT',
    confirm        => 'CONFIRM',
    continue       => 'CONTINUE',
    useragent      => 'Mozilla/5.0',
    yt_video_dir    => $yt_video_dir,
    config_file    => catfile( $config_dir, 'yt_config.txt' ),
    log_file       => catfile( $config_dir, 'yt_download.log' ),
    log_info       => 1,
    linefold       => { Charset => 'utf-8', Newline => "\n", OutputCharset => '_UNICODE_', Urgent => 'FORCE' },
    max_len_f_name => 62,
    yt_api_v       => 2,
    invalid_char   => quotemeta( '#$&+,/:;=?@' ),
    yt_regexp      => qr/(?:youtube|youtu\.be|yourepeat|tube\.majestyc)/i,
    kb_sec_len     => 5,
    max_info_width => 120,
    right_margin   => $^O eq 'MSWin32' ? 1 : 2,
    auto_width     => 1,
    retries        => 5,
    timeout        => 15,
    overwrite      => 0,
    auto_quality   => 1,
    preferred      => [ 43 ],
};

$opt = read_config_file( $opt, $opt->{config_file} );
$opt = options( $opt ) if $help;
mkdir $opt->{yt_video_dir} or die $! if ! -d $opt->{yt_video_dir};

try {
    my $youtube_dl = which( 'youtube-dl' ) // 'youtube-dl';
    my $capture = capture( $youtube_dl, '--version' );
}
catch {
    say "Could not find 'youtube-dl' - 'youtube-dl' is required - http://rg3.github.io/youtube-dl/.";
    exit 1;
};

my @ids;
for my $file ( @$arg_file ) {
    open my $fh, '<:encoding(utf-8)', encode_fs( $file ) or die $!;
    while ( my $line = <$fh> ) {
        next if $line =~ /^\s*\z/;
        next if $line =~ /^\s*#/;
        $line =~ s/^\s+|\s+\z//g;
        push @ids, split /\s+/, $line;
    }
    close $fh or die $!;
}

local $| = 1;
print locate( 1, 1 ), cldown;

push @ids, @ARGV;
if ( ! @ids ) {
    my $trs = Term::ReadLine::Simple->new();
    my $ids = $trs->readline( 'Enter url/id: ' );
    @ids = split /\s+/, $ids;
    print up( 1 ), cldown;
}
say "No arguments" and exit if ! @ids;



my $info = _parse_arguments( $opt, @ids );
download_youtube( $opt, $info ) if defined $info && %$info;



sub _parse_arguments {
    my ( $opt, @ids ) = @_;
    my $info = {};
    my $invalid_char = $opt->{invalid_char};
    my $more = 0;
    for my $id ( @ids ) {
        if ( my $channel_id = _user_id( $opt, $id, $invalid_char ) ) {
            my $tmp = _list_id_to_info_hash( $opt, 'CL', $channel_id );
            choose_from_list_and_add_to_info( $opt, $info, $tmp, [ keys %$tmp ] );
        }
        elsif ( my $playlist_id = _playlist_id( $opt, $id, $invalid_char ) ) {
            my $tmp = _list_id_to_info_hash( $opt, 'PL', $playlist_id );
            choose_from_list_and_add_to_info( $opt, $info, $tmp, [ keys %$tmp ] );
        }
        elsif ( my $more_ids = _more_ids( $opt, $id, $invalid_char ) ) {
            my $tmp = _more_url_to_info_hash( $opt, ++$more, 'MR', $more_ids );
            choose_from_list_and_add_to_info( $opt, $info, $tmp, [ keys %$tmp ] );
        }
        elsif ( my $video_id = _video_id( $opt, $id, $invalid_char )  ) {
            $info->{$video_id}{youtube} = 1;
        }
        else {
            $info = get_data( $opt, $info, $id );
        }
    }
    return $info;
}


sub _video_id {
    my ( $opt, $id, $invalid_char ) = @_;
    if ( ! $id ) {
        return;
    }
    if ( $id =~ m{^[\p{PerlWord}-]{11}\z} ) {
        return $id;
    }
    if ( $id !~ $opt->{yt_regexp} ) {
        return;
    }
    elsif ( $id =~ m{/.*?[?&;!](?:v|video_id)=([^$invalid_char]+)} ) {
        return $1;
    }
    elsif ( $id =~ m{/(?:e|v|embed)/([^$invalid_char]+)} ) {
        return $1;
    }
    elsif ( $id =~ m{#p/(?:u|search)/\d+/([^&?/]+)} ) {
        return $1;
    }
    elsif ( $id =~ m{youtu.be/([^$invalid_char]+)} ) {
        return $1;
    }
    return;
}

sub _playlist_id {
    my ( $opt, $id, $invalid_char ) = @_;
    if ( ! $id )                                        {
        return;
    }
    if ( $id =~ m{^p#(?:[FP]L)?([^$invalid_char]+)\z} ) {
        return $1;
    }
    if ( $id !~ $opt->{yt_regexp} ) {
        return;
    }
    elsif ( $id =~ m{/.*?[?&;!]list=([^$invalid_char]+)} ) {
        return $1;
    }
    elsif ( $id =~ m{^\s*([FP]L[\w\-]+)\s*\z} ) {
        return $1;
    }
    return;
}

sub _user_id {
    my ( $opt, $id, $invalid_char ) = @_;
    if ( ! $id ) {
        return;
    }
    if ( $id =~ m{^c#([^$invalid_char]+)\z} ) {
        return $1;
    }
    if ( $id !~ $opt->{yt_regexp} ) {
        return;
    }
    elsif ( $id =~ m{/user/([^$invalid_char]+)} ) {
        return $1;
    }
    elsif ( $id =~ m{/channel/([^$invalid_char]+)} ) { # ?
        return $1;
    }
    return;
}

sub _more_ids {
    my ( $opt, $id, $invalid_char ) = @_;
    if ( ! $id ) {
        return;
    }
    elsif ( $id !~ $opt->{yt_regexp} ) {
        return;
    }
    elsif ( uri_unescape( $id ) =~ m{youtu\.?be.*video_ids=([^$invalid_char]+(?:,[^$invalid_char]+)*)} ) {
        return $1;
    }
    return;
}


sub _list_id_to_info_hash {
    my( $opt, $type, $list_id ) = @_;
    my $info = {};
    printf "Fetching %s info ... \n", $type eq 'PL' ? 'playlist' : 'channel';
    my $url = URI->new( $type eq 'PL'
        ? 'https://gdata.youtube.com/feeds/api/playlists/' . $list_id
        : 'https://gdata.youtube.com/feeds/api/users/'     . $list_id . '/uploads'
    );
    my $start_index = 1;
    my $max_results = 50;
    my $count_e_nodes = $max_results;
    while ( $count_e_nodes == $max_results ) {  # or <link rel='next'>
        $url->query_form( 'start-index' => $start_index, 'max-results' => $max_results, 'v' => $opt->{yt_api_v} );
        $start_index += $max_results;
        try {
            my $res = wrapper_get( $opt, $info, $url->as_string );
            my $xml = $res->decoded_content;
            my @e_nodes = xml_to_entry_node( $opt, $xml );
            $count_e_nodes = @e_nodes;
            $info = _entry_nodes_to_info_hash( $opt, $info, \@e_nodes, $type, $list_id );
        }
        catch {
            my $prompt = "$type : $list_id - $_";
            choose( [ 'Print ENTER' ], { prompt => $prompt } );
        };
        last if ! $count_e_nodes;
    }
    if ( ! keys %$info ) {
        my $prompt = "No videos found: $type - $url";
        choose( [ 'Print ENTER' ], { prompt => $prompt } );
    }
    else {
        $info->{$opt->{back}} = {
            title => '', author => '', keywords => '', avg_rating => '', length_seconds => 0, duration => '0:00:00', #
            view_count => 0, content => '', type => $opt->{back}, published => '0000-00-00' };
    }
    my $up = keys %$info;
    print up( $up + 2 ), cldown;
    return $info;
}


sub _entry_nodes_to_info_hash {
    my ( $opt, $info, $e_nodes, $type, $list_id ) = @_;
    if ( $type eq 'PL' ) {
        my @video_ids = entry_nodes_to_video_ids( $e_nodes );
        $info = _video_ids_to_info_hash( $opt, $info, \@video_ids, $type, $list_id );
    }
    else {
        for my $e_node ( @$e_nodes ) {
            $info = entry_node_to_info_hash( $opt, $info, $e_node, $type, $list_id );
        }
    }
    return $info;
}


sub _video_ids_to_info_hash {
    my ( $opt, $info, $video_ids, $type, $list_id ) = @_;
    for my $video_id ( @$video_ids ) {
        my $url = URI->new( 'https://gdata.youtube.com/feeds/api/videos/' . $video_id );
        $url->query_form( 'v' => $opt->{yt_api_v} );
        my $res = wrapper_get( $opt, $info, $url );
        my $xml = $res->decoded_content;
        my $e_node = xml_to_entry_node( $opt, $xml );
        $info = entry_node_to_info_hash( $opt, $info, $e_node, $type, $list_id );
    }
    return $info;
}


sub _more_url_to_info_hash {
    my ( $opt, $more, $type, $more_ids ) = @_;
    my $info = {};
    my @video_ids = split /,/, $more_ids;
    my $list_id = 'mr_' . $more;
    try {
        $info = _video_ids_to_info_hash( $opt, $info, \@video_ids,  $type, $list_id );
    }
    catch {
        my $prompt = "$type : $list_id - $_";
        choose( [ 'Print ENTER' ], { prompt => $prompt } );
    };
    if ( keys %$info ) {
        $info->{$opt->{back}} = {
            title => '', author => '', keywords => '', avg_rating => '', length_seconds => 0, duration => '0:00:00', #
            view_count => 0, content => '', type => $opt->{back}, published => '0000-00-00' };
    }
    return $info;
}


__END__

=pod

=encoding UTF-8

=head1 NAME

yt-download - Download YouTube videos.

=head1 VERSION

Version 0.020

=cut

=head1 SYNOPSIS

    yt-download -h|-?|--help

    yt-download

    yt-download url|id [url|id ...]

    yt-download -f|--file filename

When passing only the id instead of the entire url it is needed to prefix every playlist id with C<p#> and
every channel id with C<c#>.

Video ids are passed without any prefix.

The ids/urls can be entered after calling C<yt-download> - this is useful if urls contain shell metacharacters
like C<&>.

The ids/urls can also be passed with a file: C<yt-download -f|--file filename>. The urls/id in the file have to be space
separated.

=head1 DESCRIPTION

Download single YouTube videos or/and choose videos from playlists or/and channels.

Before the download the script shows some video info and lets you choose the video quality from the available qualities.

Instead of choosing the quality manually it is possible to set and use preferred qualities.

To set the different options call C<yt-download -h>.

C<App::YTDL> uses L<youtube-dl|http://rg3.github.io/youtube-dl/> to get the data required for the video download.

=head3 Legacy encodings

Non mappable characters on the output are replaced with C<*>. In file names they are replaced with C<&#xNNN;> where NNN
is the Unicode code point in a decimal number.

=head1 Options

=head2 HELP

Shows this HELP text.

=head2 PATH

Shows the version and the path of the running C<yt-download> and the path of the video directory, of the log file and of
the configuration file.

=head2 UserAgent

Sets the useragent.

If entered nothing the default useragent (Mozilla/5.0) is used.

=head2 Overwrite

If I<Overwrite> is enabled, existing files are overwritten.

If not enabled C<yt-download> appends to partially downloaded file with the same name.

=head2 Set auto quality

Sets the auto quality (fmt) mode:

=over

=item

mode 0: choose always manually

=item

mode 1: keep the first quality chosen for a playlist/channel for all videos of that playlist/channel if possible.

=item

mode 2: keep the first chosen quality for all downloads if possible.

=item

mode 3: use preferred qualities.

=item

mode 4: use always default (best) quality.

=back

=head2 Preferred qualities

Sets the preferred qualities (fmts)

=head2 Download retries

Sets the number of download retries.

=head2 Connection timeout

I<timeout> (seconds) is used as the value for the C<youtube-dl> parameter C<--socket-timeout>. I<timeout> is also used
as the value for the L<LWP::UserAgent> option C<timeout> when fetching the data required for the video download.

=head2 Enable logging

Enables info logging.

=head2 Max info width

Sets the maximum width of video info output.

=head2 Auto width

Increase the info output width automatically if the info text is long.

=head2 Max filename length

Sets the maximum length of the filename. Filenames longer as the maximum length are truncated.

=head2 Digits for "k/s"

Sets the number of digits allocated for the "kilobyte per seconds" template.

=head2 Video directory

Choose an alternative YouTube video directory.

=head1 REQUIREMENTS

=head2 Perl version

Requires Perl version 5.10.0 or greater.

=head2 youtube-dl

L<youtube-dl|http://rg3.github.io/youtube-dl/> is required.

=head2 Encoding layer

For a correct output it is required an appropriate encoding layer for STDOUT matching the terminal's character set.

=head2 Monospaced font

It is required a terminal that uses a monospaced font which supports the printed characters.

=head1 CREDITS

C<App::YTDL> uses L<youtube-dl|http://rg3.github.io/youtube-dl/> to get the data required for the video download.

Thanks to the L<Perl-Community.de|http://www.perl-community.de> and the people form
L<stackoverflow|http://stackoverflow.com> for the help.

=head1 AUTHOR

Kuerbis <cuer2s@gmail.com>

=head1 LICENSE AND COPYRIGHT

Copyright (C) 2013-2014 Kuerbis.

This program is free software; you can redistribute it and/or modify it under the same terms as Perl 5.10.0. For
details, see the full text of the licenses in the file LICENSE.

=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)