@@ -1,5 +1,14 @@
Revision history for Perl extension HTML::Copy.
+1.31 2013-06-27
+ * Remove devendencies on Cwd package.
+ * Broken links are not unescaped.
+ * Ignore template toolikit's variables in links.
+ * Fixed spelling mistakes in the document.
+ * Thanks to gregor herrmann.
+ * Fixed failing tests with Perl 5.18.
+ * Thanks to gregor hermman.
+
1.3 2008-02-20
* HTML::Copy can accept file handles instead file pathes.
* htmlcopy can use standard input and output.
@@ -1,15 +1,25 @@
-# http://module-build.sourceforge.net/META-spec.html
-#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
-name: HTML-Copy
-version: 1.3
-version_from: lib/HTML/Copy.pm
-installdirs: site
+--- #YAML:1.0
+name: HTML-Copy
+version: 1.31
+abstract: ~
+author: []
+license: unknown
+distribution_type: module
+configure_requires:
+ ExtUtils::MakeMaker: 0
+build_requires:
+ ExtUtils::MakeMaker: 0
requires:
- Class::Accessor: 0
- HTML::Parser: 3.4
- HTTP::Headers: 0
- Test::More: 0
- URI: 0
-
-distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.17
+ Class::Accessor: 0
+ HTML::Parser: 3.4
+ HTTP::Headers: 0
+ Test::More: 0
+ URI: 0
+no_index:
+ directory:
+ - t
+ - inc
+generated_by: ExtUtils::MakeMaker version 6.56
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
@@ -1,4 +1,4 @@
-#!perl -w
+#!/usr/bin/env perl -w
use 5.008;
use strict;
use ExtUtils::MakeMaker;
@@ -6,9 +6,8 @@ use File::Spec;
use HTML::Copy;
use Getopt::Long;
use Pod::Usage;
-use Cwd;
-our $VERSION = '1.3';
+our $VERSION = '1.31';
{
my $man = 0;
@@ -21,7 +20,7 @@ our $VERSION = '1.3';
if (@ARGV > 2) {
pod2usage(-message => 'Too many arguments.',
-exitstatus => 1, -verbose => 1)
- }
+ }
if (@ARGV < 1) {
pod2usage(-message => 'Required arguments is not given.',
@@ -6,26 +6,24 @@ use warnings;
use File::Spec;
use File::Basename;
use File::Path;
-#use Cwd;
use utf8;
use Encode;
use Encode::Guess;
use Carp;
-#use Data::Dumper;
use HTML::Parser 3.40;
use HTML::HeadParser;
use URI::file;
-use base qw(HTML::Parser Class::Accessor);
+use base qw(HTML::Parser Class::Accessor::Fast);
__PACKAGE__->mk_accessors(qw(link_attributes
has_base));
#use Data::Dumper;
-our @default_link_attributes = ('src', 'href', 'background', 'csref', 'livesrc');
-# 'livesrc' and 'csref' are uesed in Adobe GoLive
+our @default_link_attributes = ('src', 'href', 'background', 'csref', 'livesrc', 'user');
+# 'livesrc', 'user' and 'csref' are uesed in Adobe GoLive
=head1 NAME
@@ -33,11 +31,11 @@ HTML::Copy - copy a HTML file without breaking links.
=head1 VERSION
-Version 1.3
+Version 1.31
=cut
-our $VERSION = '1.3';
+our $VERSION = '1.31';
=head1 SYMPOSIS
@@ -112,7 +110,7 @@ sub parse_file($$$) {
$p = HTML::Copy->new($source);
-Make an instance of this module with specifing a source of HTML.
+Make an instance of this module with specifying a source of HTML.
The argument $source can be a file path or a file handle. When a file handle is passed, you may need to indicate a file path of the passed file handle by the method L<"source_path">. If calling L<"source_path"> is omitted, it is assumed that the location of the file handle is the current working directory.
@@ -127,7 +125,10 @@ sub new {
@$self{@keys} = @args{@keys};
} else {
my $file = shift @_;
- if (!ref($file) && (ref(\$file) ne "GLOB")) {
+ my $ref = ref($file);
+ if ($ref =~ /^Path::Class::File/) {
+ $self->source_path($file);
+ } elsif (! $ref && (ref(\$file) ne 'GLOB')) {
$self->source_path($file);
} else {
$self->source_handle($file);
@@ -136,7 +137,7 @@ sub new {
$self->link_attributes(\@default_link_attributes);
$self->has_base(0);
-
+ $self->attr_encoded(1);
return $self;
}
@@ -250,7 +251,6 @@ sub encoding {
if ($self->{'encoding'}) {
return $self->{'encoding'};
}
-
my $in = $self->source_handle;
my $data = do {local $/; <$in>;};
my $p = HTML::HeadParser->new;
@@ -289,7 +289,7 @@ sub encoding {
$p->io_layer;
$p->io_layer(':utf8');
-Get and set PerlIO layer to read the source path and to write the destination path. Usualy it was automatically determined by $source_path's charset tag. If charset is not specified, Encode::Guess module will be used.
+Get and set PerlIO layer to read the source path and to write the destination path. Usually it was automatically determined by $source_path's charset tag. If charset is not specified, Encode::Guess module will be used.
=cut
@@ -312,7 +312,7 @@ sub io_layer {
@suspects = $p->encode_sustects;
$p->encode_suspects(qw/shiftjis euc-jp/);
-Add suspects of text encoding to guess the text encoding of the source HTML. If the source HTML have charset tag, it is not requred to add suspects.
+Add suspects of text encoding to guess the text encoding of the source HTML. If the source HTML have charset tag, it is not required to add suspects.
=cut
@@ -361,10 +361,29 @@ Tetsuro KURITA <tkurita@mac.com>
sub declaration { $_[0]->output("<!$_[1]>") }
sub process { $_[0]->output($_[2]) }
-sub comment { $_[0]->output("<!--$_[1]-->") }
sub end { $_[0]->output($_[2]) }
sub text { $_[0]->output($_[1]) }
+sub comment {
+ my ($self, $comment) = @_;
+ if ($comment =~ /InstanceBegin template="([^"]+)"/) {
+ my $uri = URI->new($1);
+ my $newlink = $self->change_link($uri);
+ $comment = " InstanceBegin template=\"$newlink\" ";
+ }
+
+ $self->output("<!--$comment-->");
+}
+
+sub process_link {
+ my ($self, $link_path)= @_;
+ return undef if ($link_path =~ /^\$/);
+ return undef if ($link_path =~ /^\[%.*%\]$/);
+ my $uri = URI->new($link_path);
+ return undef if ($uri->scheme);
+ return $self->change_link($uri);
+}
+
sub start {
my ($self, $tag, $attr_dict, $attr_names, $tag_text) = @_;
@@ -376,15 +395,23 @@ sub start {
my $is_changed = 0;
foreach my $an_attr (@{$self->link_attributes}) {
if (exists($attr_dict->{$an_attr})){
- my $link_path = $attr_dict->{$an_attr};
- next if ($link_path =~ /^\$/);
- my $uri = URI->new($link_path);
- next if ($uri->scheme);
+ my $newlink = $self->process_link($attr_dict->{$an_attr});
+ next unless ($newlink);
+ $attr_dict->{$an_attr} = $newlink;
$is_changed = 1;
- $attr_dict->{$an_attr} = $self->change_link($uri);
}
}
-
+
+ if ($tag eq 'param') {
+ if ($attr_dict->{'name'} eq 'src') {
+ my $newlink = $self->process_link($attr_dict->{'value'});
+ if ($newlink) {
+ $attr_dict->{'value'} = $newlink;
+ $is_changed = 1;
+ }
+ }
+ }
+
if ($is_changed) {
my $attrs_text = $self->build_attributes($attr_dict, $attr_names);
$tag_text = "<$tag $attrs_text>";
@@ -462,7 +489,7 @@ sub change_link {
$result_uri = $abs_uri->rel($self->destination_uri);
} else {
warn("$abs_path is not found.\nThe link to this path is not changed.\n");
- $result_uri = $uri;
+ return "";
}
return $result_uri->as_string;
@@ -478,9 +505,8 @@ sub source_handle {
if (@_) {
$self->{'source_handle'} = shift @_;
- }
- elsif (!$self->{'source_handle'}) {
- my $path = $self->source_path or croak "source_paht is undefined.";
+ } elsif (!$self->{'source_handle'}) {
+ my $path = $self->source_path or croak "source_path is undefined.";
open my $in, "<", $path or croak "Can't open $path.";
$self->{'source_handle'} = $in;
}
@@ -6,6 +6,7 @@ use HTML::Copy;
use utf8;
use File::Spec::Functions;
#use Data::Dumper;
+use Encode qw(encode_utf8 decode_utf8);
use Test::More tests => 16;
@@ -109,7 +110,7 @@ $copy_html = do {
ok($copy_html eq $result_html_nocharset, "copy_to no charset shift_jis");
##== HTML with charset uft-8
-my $src_html_utf8 = <<EOT;
+my $src_html_utf8 = encode_utf8(<<EOT);
<!DOCTYPE html>
<html>
<head>
@@ -126,7 +127,7 @@ my $src_html_utf8 = <<EOT;
</html>
EOT
-my $result_html_utf8 = <<EOT;
+my $result_html_utf8 = encode_utf8(<<EOT);
<!DOCTYPE html>
<html>
<head>
@@ -174,7 +175,7 @@ $copy_html = do {
read_and_unlink($destination, $p);
};
-ok($copy_html eq $result_html_utf8, "copy_to giviing a file handle");
+ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving a file handle");
##=== copy_to gving file handles for input and output
$copy_html = do {
@@ -187,7 +188,7 @@ $copy_html = do {
Encode::decode($p->encoding, $outdata);
};
-ok($copy_html eq $result_html_utf8, "copy_to giviing file handles for input and output");
+ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving file handles for input and output");
##=== parse_to giving a file handle
$copy_html = do {
@@ -196,7 +197,7 @@ $copy_html = do {
$p->parse_to($destination);
};
-ok($copy_html eq $result_html_utf8, "copy_to giviing file handles for input and output");
+ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving file handles for input and output");
##=== copy_to with directory destination
$copy_html = do {