The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package WWW::Coursera;

use strict;
use warnings;

use 5.010;
use Moo;
use Mojo::DOM;
use Mojo::UserAgent;
use AnyEvent;
use AnyEvent::Util 'fork_call';
my $cv = AE::cv;
use File::Path qw( make_path );
use Carp qw(croak) ;

$ENV{MOJO_MAX_MESSAGE_SIZE} = 1073741824;

=head1 NAME

WWW::Coursera - Downloading parallel material (video, text, pdf ...) from Coursera.org online classes.

=head1 VERSION

version 0.08

=cut

our $VERSION = '0.08';

=head2 username

  set username

=cut

has username => (
    is       => 'ro',
    required => 1,
);

=head2 password

  set password

=cut

has password => (
    is       => 'ro',
    required => 1,
);

=head2 course_id

  set course id

=cut

has course_id => (
    is       => 'ro',
    required => 1,
);

=head2 debug

  debug option

=cut

has debug => (
    is      => 'rw',
    default => 0,
);

=head2 max_parallel_download

  set max parallel http requests

=cut

has max_parallel_download => (
    is      => 'rw',
    default => 2,
);

=head2 override_existing_files

  set option ro override existing files 

=cut

has override_existing_files => (
    is      => 'rw',
    default => 0,
);


=head1 SYNOPSIS

    Scrape video materials from lectures area and download paralell related files.
    The default download directory is set to the course_id.
    
    The only one requirement is to enroll the course online.


    use WWW::Coursera;
    my $init = WWW::Coursera->new(
        username              	=> 'xxxx',	#is required
        password              	=> 'xxxx',	#is required
        course_id             	=> "xxxx",	#is required
        debug                 	=> 1,		#default disabled
        max_parallel_download 	=> 2,		#default 2
        override_existing_files	=> 1,		#default false
      );
      $init->run;

=head1 SUBROUTINES/METHODS

=head2 directory

  Create new directory 

=cut

sub directory {
    my $self = shift;
    unless ( -d $self->{course_id} ) {
        make_path $self->{course_id} or die "Failed to create path: 
  $self->{course_id}";
    }
}

=head2 extentions

  Definition of downoading extentions

=cut

sub extentions {
    my $self = shift;
    my @extention = ( "mp4", "txt", "pdf", "pptx", "srt" );
    return @extention;
}

=head2 UserAgent

  Create UserAgent object

=cut

sub UserAgent {
    my $self = shift;
    my $ua   = Mojo::UserAgent->new;
    $ua = $ua->max_redirects(1);
    $self->{ua} = $ua;
}


=head2 csrf

  Save csrf token for authentication

=cut

sub csrf {
    my $self = shift;
    $self->UserAgent;
    my $tx =
      $self->{ua}
      ->get("https://class.coursera.org/$self->{course_id}/lecture/index");
    my $csrf = $tx->res->cookies->[0]->{value};
    croak "Error: No CSRF key available my be the couse is not available"
      unless $csrf;
    $self->{csrf} = $csrf;
    say "The CSRF key is : $csrf" if $self->debug;
}

=head2 login

  Login with username, password and csrftoken

=cut

sub login {
    my $self = shift;
    $self->csrf;
    my $tx = $self->{ua}->post(
        'https://accounts.coursera.org/api/v1/login' => {
            'Cookie'      => "csrftoken=$self->{csrf}",
            'X-CSRFToken' => "$self->{csrf}"
          } => form =>
          { email => "$self->{username}", password => "$self->{password}" }
    );
    say "The http response code from login page is :" . $tx->res->code
      if $self->debug;
    unless ( $tx->res->code == 200 ) {
        my ( $err1, $code1 ) = $tx->error;
        say $code1 ? "$code1 response: $err1" : "Connection error: $err1";
    }
}

=head2 convert_filename

  Replace all non word chars with underscore

=cut

sub convert_filename {
    my ( $self, $string, $ext ) = @_;
    $string =~ s/\W/_/g;
    $string =~ s/__/_/g;
    $string = "$string" . ".$ext";
    $string =~ s/_\././g;
    say "Convert string $string" if $self->debug;
    return $string;
}

=head2 extract_urls

  Scrape urls from lectures

=cut

sub extract_urls {
    my $self = shift;
    $self->login;
    my %urls;
    my $r =
      $self->{ua}->get("https://class.coursera.org/$self->{course_id}/lecture");
    if ( my $res = $r->success ) {
        my $dom = $r->res->dom->html->body;
        $dom->find('div.course-lecture-item-resource')->each(
            sub {
                my ( $e, $count ) = @_;
                my $title = $e->find('a[data-if-linkable=modal-lock]')->each(
                    sub {
                        my ( $b, $cnt ) = @_;
                        my $file = $b->find('div.hidden')->text;
                        my $url  = $b->attr('href');
                        foreach my $ext ( $self->extentions ) {
                            if ( "$url" =~ m/$ext/ ) {
                                my $conv_name =
                                  $self->convert_filename( $file, $ext );
                                $urls{$conv_name} = "$url";
                            }
                        }
                    }
                );
            }
        );
        $self->{urls} = \%urls;
    }
    else {
        my ( $err, $code ) = $res->error;
        say $code ? "$code response: $err" : "Connection error: $err";
    }
}

=head2 download

  Download lectures in the course_id folder

=cut

sub download {
    my ( $self, $file ) = @_;
    say "Start download $file in $self->{course_id}";
    my $url = $self->{urls}->{$file};
    $self->directory;
    my $path = "$self->{course_id}/$file";

    if ( $self->override_existing_files ) {
        my $response = $self->{ua}->get( $url, { Accept => '*/*' } )->res;
        open my $fh, '>', $path or die "Could not open [$file]: $!";
        print $fh $response->body;
    }
    else {
        if ( !-e $path ) {
            my $response = $self->{ua}->get( $url, { Accept => '*/*' } )->res;
            open my $fh, '>', $path or die "Could not open [$file]: $!";
            print $fh $response->body;
        }
    }
}



=head2 run

  Entry point of the package

=cut

sub run {
    my $self = shift;
    $AnyEvent::Util::MAX_FORKS = $self->max_parallel_download;
    $self->extract_urls;
    my @arr = keys $self->{urls};
    foreach my $file (@arr) {
        $cv->begin;
        fork_call {
            $self->download($file);
        }
        sub {
            $cv->end;
          }
    }
    $cv->recv;
}




=head1 AUTHOR

Ovidiu N. Tatar, C<< <ovn.tatar at gmail.com> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-www-coursera at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW-Coursera>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.


=head1 REQUIREMENT

        perl 5.010 or higher
        Enrol course before start downloding
        For more info regarding requires modules (see Build.PL)

=head1 INSTALLATION

To install this module, run the following commands:

	git clone https://github.com/ovntatar/WWW-Coursera.git
	cd WWW-Coursera
        
	perl Build.PL
        ./Build
        ./Build test
        ./Build install

        OR (if you don't have write permissions to create man3) use cpanminus: 

        cpanm WWW-Coursera


=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc WWW::Coursera
    
    or
   
    https://github.com/ovntatar/WWW-Coursera/issues


You can also look for information at:

=over 4

=item * RT: CPAN's request tracker (report bugs here)

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=WWW-Coursera>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/WWW-Coursera>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/WWW-Coursera>

=item * Search CPAN

L<http://search.cpan.org/dist/WWW-Coursera/>

=back


=head1 ACKNOWLEDGEMENTS


=head1 LICENSE AND COPYRIGHT

Copyright 2013 Ovidiu N. Tatar.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See http://dev.perl.org/licenses/ for more information.


=cut

1; # End of WWW::Coursera