The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
Changes 09
META.yml 1424
Makefile.PL 11
bin/htmlcopy 32
lib/HTML/Copy.pm 2450
t/parse.t 56
6 files changed (This is a version diff) 4792
@@ -1,5 +1,14 @@
 Revision history for Perl extension HTML::Copy.
 
+1.31 2013-06-27
+    * Remove devendencies on Cwd package.
+    * Broken links are not unescaped.
+    * Ignore template toolikit's variables in links.
+    * Fixed spelling mistakes in the document.
+        * Thanks to gregor herrmann.
+    * Fixed failing tests with Perl 5.18.
+        * Thanks to gregor hermman.
+
 1.3 2008-02-20
     * HTML::Copy can accept file handles instead file pathes.
     * htmlcopy can use standard input and output.
@@ -1,15 +1,25 @@
-# http://module-build.sourceforge.net/META-spec.html
-#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
-name:         HTML-Copy
-version:      1.3
-version_from: lib/HTML/Copy.pm
-installdirs:  site
+--- #YAML:1.0
+name:               HTML-Copy
+version:            1.31
+abstract:           ~
+author:  []
+license:            unknown
+distribution_type:  module
+configure_requires:
+    ExtUtils::MakeMaker:  0
+build_requires:
+    ExtUtils::MakeMaker:  0
 requires:
-    Class::Accessor:               0
-    HTML::Parser:                  3.4
-    HTTP::Headers:                 0
-    Test::More:                    0
-    URI:                           0
-
-distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.17
+    Class::Accessor:  0
+    HTML::Parser:     3.4
+    HTTP::Headers:    0
+    Test::More:       0
+    URI:              0
+no_index:
+    directory:
+        - t
+        - inc
+generated_by:       ExtUtils::MakeMaker version 6.56
+meta-spec:
+    url:      http://module-build.sourceforge.net/META-spec-v1.4.html
+    version:  1.4
@@ -1,4 +1,4 @@
-#!perl -w
+#!/usr/bin/env perl -w
 use 5.008;
 use strict;
 use ExtUtils::MakeMaker;
@@ -6,9 +6,8 @@ use File::Spec;
 use HTML::Copy;
 use Getopt::Long;
 use Pod::Usage;
-use Cwd;
 
-our $VERSION = '1.3';
+our $VERSION = '1.31';
 
 {
     my $man = 0;
@@ -21,7 +20,7 @@ our $VERSION = '1.3';
     if (@ARGV > 2) {
         pod2usage(-message => 'Too many arguments.', 
                     -exitstatus => 1, -verbose => 1)
-  }
+    }
 
     if (@ARGV < 1) {
         pod2usage(-message => 'Required arguments is not given.', 
@@ -6,26 +6,24 @@ use warnings;
 use File::Spec;
 use File::Basename;
 use File::Path;
-#use Cwd;
 use utf8;
 use Encode;
 use Encode::Guess;
 use Carp;
-#use Data::Dumper;
 
 use HTML::Parser 3.40;
 use HTML::HeadParser;
 use URI::file;
 
-use base qw(HTML::Parser Class::Accessor);
+use base qw(HTML::Parser Class::Accessor::Fast);
 
 __PACKAGE__->mk_accessors(qw(link_attributes
                             has_base));
 
 #use Data::Dumper;
 
-our @default_link_attributes = ('src', 'href', 'background', 'csref', 'livesrc');
-# 'livesrc' and 'csref' are uesed in Adobe GoLive
+our @default_link_attributes = ('src', 'href', 'background', 'csref', 'livesrc', 'user');
+# 'livesrc', 'user' and 'csref' are uesed in Adobe GoLive
 
 =head1 NAME
 
@@ -33,11 +31,11 @@ HTML::Copy - copy a HTML file without breaking links.
 
 =head1 VERSION
 
-Version 1.3
+Version 1.31
 
 =cut
 
-our $VERSION = '1.3';
+our $VERSION = '1.31';
 
 =head1 SYMPOSIS
 
@@ -112,7 +110,7 @@ sub parse_file($$$) {
 
     $p = HTML::Copy->new($source);
 
-Make an instance of this module with specifing a source of HTML.
+Make an instance of this module with specifying a source of HTML.
 
 The argument $source can be a file path or a file handle. When a file handle is passed, you may need to indicate a file path of the passed file handle by the method L<"source_path">. If calling L<"source_path"> is omitted, it is assumed that the location of the file handle is the current working directory.
 
@@ -127,7 +125,10 @@ sub new {
         @$self{@keys} = @args{@keys};
     } else {
         my $file = shift @_;
-        if (!ref($file) && (ref(\$file) ne "GLOB")) {
+        my $ref = ref($file);
+        if ($ref =~ /^Path::Class::File/) {
+            $self->source_path($file);
+        } elsif (! $ref && (ref(\$file) ne 'GLOB')) {
             $self->source_path($file);
         } else {
             $self->source_handle($file);
@@ -136,7 +137,7 @@ sub new {
     
     $self->link_attributes(\@default_link_attributes);
     $self->has_base(0);
-    
+    $self->attr_encoded(1);
     return $self;
 }
 
@@ -250,7 +251,6 @@ sub encoding {
     if ($self->{'encoding'}) {
         return $self->{'encoding'};
     }
-    
     my $in = $self->source_handle;
     my $data = do {local $/; <$in>;};
     my $p = HTML::HeadParser->new;
@@ -289,7 +289,7 @@ sub encoding {
     $p->io_layer;
     $p->io_layer(':utf8');
 
-Get and set PerlIO layer to read the source path and to write the destination path. Usualy it was automatically determined by $source_path's charset tag. If charset is not specified, Encode::Guess module will be used.
+Get and set PerlIO layer to read the source path and to write the destination path. Usually it was automatically determined by $source_path's charset tag. If charset is not specified, Encode::Guess module will be used.
 
 =cut
 
@@ -312,7 +312,7 @@ sub io_layer {
     @suspects = $p->encode_sustects;
     $p->encode_suspects(qw/shiftjis euc-jp/);
 
-Add suspects of text encoding to guess the text encoding of the source HTML. If the source HTML have charset tag, it is not requred to add suspects.
+Add suspects of text encoding to guess the text encoding of the source HTML. If the source HTML have charset tag, it is not required to add suspects.
 
 =cut
 
@@ -361,10 +361,29 @@ Tetsuro KURITA <tkurita@mac.com>
 
 sub declaration { $_[0]->output("<!$_[1]>")     }
 sub process     { $_[0]->output($_[2])          }
-sub comment     { $_[0]->output("<!--$_[1]-->") }
 sub end         { $_[0]->output($_[2])          }
 sub text        { $_[0]->output($_[1])          }
 
+sub comment     {
+    my ($self, $comment) = @_;
+    if ($comment =~ /InstanceBegin template="([^"]+)"/) {
+        my $uri = URI->new($1);
+        my $newlink = $self->change_link($uri);
+        $comment = " InstanceBegin template=\"$newlink\" ";
+    }
+    
+    $self->output("<!--$comment-->");
+}
+
+sub process_link {
+    my ($self, $link_path)= @_;
+    return undef if ($link_path =~ /^\$/);
+    return undef if ($link_path =~ /^\[%.*%\]$/);
+    my $uri = URI->new($link_path);
+    return undef if ($uri->scheme);
+    return $self->change_link($uri);
+}
+
 sub start {
     my ($self, $tag, $attr_dict, $attr_names, $tag_text) = @_; 
     
@@ -376,15 +395,23 @@ sub start {
         my $is_changed = 0;
         foreach my $an_attr (@{$self->link_attributes}) {
             if (exists($attr_dict->{$an_attr})){
-                my $link_path = $attr_dict->{$an_attr};
-                next if ($link_path =~ /^\$/);
-                my $uri = URI->new($link_path);
-                next if ($uri->scheme);
+                my $newlink = $self->process_link($attr_dict->{$an_attr});
+                next unless ($newlink);
+                $attr_dict->{$an_attr} = $newlink;
                 $is_changed = 1;
-                $attr_dict->{$an_attr} = $self->change_link($uri);
             }
         }
-    
+        
+        if ($tag eq 'param') {
+            if ($attr_dict->{'name'} eq 'src') {
+                my $newlink = $self->process_link($attr_dict->{'value'});
+                if ($newlink) {
+                    $attr_dict->{'value'} = $newlink;
+                    $is_changed = 1;
+                }
+            }
+        }
+        
         if ($is_changed) {
             my $attrs_text = $self->build_attributes($attr_dict, $attr_names);
             $tag_text = "<$tag $attrs_text>";
@@ -462,7 +489,7 @@ sub change_link {
         $result_uri = $abs_uri->rel($self->destination_uri);
     } else {
         warn("$abs_path is not found.\nThe link to this path is not changed.\n");
-        $result_uri = $uri;
+        return "";
     }
     
     return $result_uri->as_string;
@@ -478,9 +505,8 @@ sub source_handle {
     
     if (@_) {
         $self->{'source_handle'} = shift @_;
-    }
-    elsif (!$self->{'source_handle'}) {
-        my $path = $self->source_path or croak "source_paht is undefined.";
+    } elsif (!$self->{'source_handle'}) {
+        my $path = $self->source_path or croak "source_path is undefined.";
         open my $in, "<", $path or croak "Can't open $path.";
         $self->{'source_handle'} = $in;
     }
@@ -6,6 +6,7 @@ use HTML::Copy;
 use utf8;
 use File::Spec::Functions;
 #use Data::Dumper;
+use Encode qw(encode_utf8 decode_utf8);
 
 use Test::More tests => 16;
 
@@ -109,7 +110,7 @@ $copy_html = do {
 ok($copy_html eq $result_html_nocharset, "copy_to no charset shift_jis");
 
 ##== HTML with charset uft-8
-my $src_html_utf8 = <<EOT;
+my $src_html_utf8 = encode_utf8(<<EOT);
 <!DOCTYPE html>
 <html>
 <head>
@@ -126,7 +127,7 @@ my $src_html_utf8 = <<EOT;
 </html>
 EOT
 
-my $result_html_utf8 = <<EOT;
+my $result_html_utf8 = encode_utf8(<<EOT);
 <!DOCTYPE html>
 <html>
 <head>
@@ -174,7 +175,7 @@ $copy_html = do {
     read_and_unlink($destination, $p);
 };
 
-ok($copy_html eq $result_html_utf8, "copy_to giviing a file handle");
+ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving a file handle");
 
 ##=== copy_to gving file handles for input and output
 $copy_html = do {
@@ -187,7 +188,7 @@ $copy_html = do {
     Encode::decode($p->encoding, $outdata);
 };
 
-ok($copy_html eq $result_html_utf8, "copy_to giviing file handles for input and output");
+ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving file handles for input and output");
 
 ##=== parse_to giving a file handle
 $copy_html = do {
@@ -196,7 +197,7 @@ $copy_html = do {
     $p->parse_to($destination);
 };
 
-ok($copy_html eq $result_html_utf8, "copy_to giviing file handles for input and output");
+ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving file handles for input and output");
 
 ##=== copy_to with directory destination
 $copy_html = do {