#!/usr/bin/perl -w
# Copyright 2008, 2009, 2010, 2011, 2012, 2013, 2014 Kevin Ryde
#
# This file is part of RSS2Leafnode.
#
# RSS2Leafnode is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later
# version.
#
# RSS2Leafnode is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along
# with RSS2Leafnode. If not, see <http://www.gnu.org/licenses/>.
use strict;
use warnings;
use App::RSS2Leafnode;
use URI;
use URI::file;
use Getopt::Long;
# uncomment this to run the ### lines
#use Smart::Comments;
use FindBin;
my $progname = $FindBin::Script;
use Encode; # for Encode::PERLQQ
use PerlIO::encoding; # for fallback
# version 0.06 for bug fix of a struct size for perl 5.10 (there's some
# fragile duplication)
use PerlIO::locale 0.06;
our $VERSION = 33;
# locale encoding conversion on the tty, wide-chars everywhere internally
# for instance $subject from an item might be wide chars printed when --verbose
{ no warnings 'once';
local $PerlIO::encoding::fallback = Encode::PERLQQ; # \x{1234} style
(binmode (STDOUT, ':locale') && binmode (STDERR, ':locale'))
or die "Cannot set :encoding on stdout/stderr: $!\n";
}
my $r2l = App::RSS2Leafnode->new
(
user_agent => 'blah/1.2',
# rss_charset_override => 'windows-1252',
# rss_charset_override => 'iso-8859-1',
verbose => 2,
msgidextra => 'b',
render => 1,
# render => 'vilistextum',
# render => 'lynx',
render_width => 50,
rss_newest_only => 1, # '1 day',
# rss_get_links => 1,
# rss_get_comments => 1,
# get_icon => 1,
# html_extract_main => 1,
html_extract_main => 'attach_full',
);
my @uris;
my $method = 'fetch_rss';
my $option_post = 0;
GetOptions (require_order => 1,
'verbose:1' => \$r2l->{'verbose'},
'msgid=s' => \$r2l->{'msgidextra'},
'newest' => \$r2l->{'rss_newest_only'},
'html' => sub { $method = 'fetch_html' },
'post' => \$option_post,
'all' => sub { $r2l->{'rss_newest_only'} = 0 },
'<>' => sub {
my ($arg) = @_;
$arg = "$arg";
push @uris,
($arg =~ /^[a-z]+:/ ? URI->new($arg) : URI::file->new($arg));
},
) or exit 1;
if (! $option_post) {
no warnings 'redefine';
*App::RSS2Leafnode::nntp_message_id_exists = sub { 0 };
*App::RSS2Leafnode::nntp_post = sub {
my ($self, $mime) = @_;
print "\n[$progname: message]\n",
$mime->as_string,
"\n[$progname: end, mime_type ",$mime->mime_type,"]\n";
if ($mime->mime_type eq 'text/html') {
my $head = $mime->head;
my $body = $mime->bodyhandle;
my $charset = $head->mime_attr('content-type.charset');
my $html = $body->as_string;
my $utf8 = utf8::is_utf8($html);
### $utf8
if (! $utf8 && $charset) {
$html = Encode::decode($charset, $html)
}
# require File::Temp;
# my $tempfh = File::Temp->new;
# $body->print($tempfh);
# close $tempfh;
# ### bodyhandle: ref($body)
# my $utf8 = utf8::is_utf8($html);
# ### $utf8
# # print $html;
require HTML::Lint;
my $lint = HTML::Lint->new;
$lint->newfile ('message');
$lint->parse ($html);
# $lint->parse_file ($tempfh->filename);
my @errors = $lint->errors;
@errors = grep {$_->errcode ne 'text-use-entity'} @errors;
print "HTML::Lint errors ",scalar(@errors),"\n";
foreach my $error (@errors) {
print $error->as_string, "\n";
}
}
return 1;
};
}
if (1) {
$r2l->ua->add_handler(response_done => \&lwp_response_done__add_content_md5);
sub lwp_response_done__add_content_md5 {
my ($resp, $ua, $h) = @_;
if ($resp->is_success && ! defined($resp->header('Content-MD5'))) {
my $uri = $resp->request->uri;
# require Data::Dumper;
# print "$progname: ", Data::Dumper->new([\$content],['content'])->Useqq(1)->Dump;
# print "$progname: ", $resp->headers->as_string;
if (defined (my $content = $resp->decoded_content (charset => 'none'))) {
print "$progname: add Content-MD5 to $uri\n";
require Digest::MD5;
my $md5 = Digest::MD5::md5_hex($content);
$resp->headers->header ('Content-MD5' => $md5);
} else {
print "$progname: oops, cannot decoded_content() to add Content-MD5\n";
}
}
print "$progname: check md5\n";
App::RSS2Leafnode::lwp_response_done__check_md5 ($r2l, $resp);
}
# if ($uri->scheme eq 'file' && $uri->host ~~ ['','localhost']) {
# my $filename = $uri->file;
# require Digest::file;
# $md5 = Digest::file::digest_file_hex($filename, "MD5");
# } else {
# }
}
if (! @uris) {
@uris = map {URI::file->new($_)} glob('samp/*');
$r2l->{'rss_newest_only'} = 1;
}
foreach my $uri (@uris) {
if ($uri->isa('URI::file')) {
$uri = URI->new_abs ($uri, URI::file->cwd);
}
}
foreach my $uri (@uris) {
print "-------------------------------------------------------------------------------\n$progname: $method $uri\n";
# force re-read
$r2l->status_read;
delete $r2l->{'global_status'}->{$uri};
$r2l->$method ('r2l.test', $uri,
# render => 'lynx',
);
}
exit 0;