The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#escapeChars {return}
#use lib '../lib';
use strict;
use warnings;
use HTML::Copy;
use utf8;
use File::Spec::Functions;
#use Data::Dumper;
use Encode qw(encode_utf8 decode_utf8);

use Test::More tests => 16;

sub read_and_unlink {
    my ($path, $htmlcopy) = @_;
    open(my $in, "<".$htmlcopy->io_layer(), $path)
                                or die "Can't open $path.";
    my $contents = do {local $/; <$in>};
    close $in;
    unlink($path);
    return $contents;
}

##== prepare linked HTML file
my $linked_html = <<EOT;
<!DOCTYPE html>
<html>
</html>
EOT

my $linked_file_name = "linked$$.html";
open(my $linked_fh, ">", $linked_file_name)
                or die "Can't open $linked_file_name.";
print $linked_fh $linked_html;
close $linked_fh;

##== HTML data without charsets
my $source_html_nocharset = <<EOT;
<!DOCTYPE html>
<html>
ああ
<a href="$linked_file_name#anchor"></a>
<frame src="$linked_file_name">
<img src="$linked_file_name">
<script src="$linked_file_name"></script>
<link href="$linked_file_name">
</html>
EOT

my $result_html_nocharset = <<EOT;
<!DOCTYPE html>
<html>
ああ
<a href="../$linked_file_name#anchor"></a>
<frame src="../$linked_file_name">
<img src="../$linked_file_name">
<script src="../$linked_file_name"></script>
<link href="../$linked_file_name">
</html>
EOT

##== write test data
my $sub_dir_name = "sub$$";
mkdir($sub_dir_name);
my $src_file_name = "file$$.html";
my $destination = catfile($sub_dir_name, $src_file_name);

##== Test code with no charsets HTML
open(my $src_fh, ">:utf8", $src_file_name) 
                            or die "Can't open $src_file_name.";
print $src_fh $source_html_nocharset;
close $src_fh;

##=== parse_to UTF-8
my $p = HTML::Copy->new($src_file_name);
my $copy_html = $p->parse_to($destination);

ok($copy_html eq $result_html_nocharset, "parse_to no charset UTF-8");

##=== copty_to UTF8
$p->copy_to($destination);
open(my $in, "<".$p->io_layer(), $destination)
                            or die "Can't open $destination.";
$copy_html = read_and_unlink($destination, $p);

ok($copy_html eq $result_html_nocharset, "copy_to no charset UTF-8");

##=== write data with shift_jis
open($src_fh, ">:encoding(shiftjis)", $src_file_name)
                            or die "Can't open $src_file_name.";
print $src_fh $source_html_nocharset;
close $src_fh;

##=== parse_to shift_jis
$p = HTML::Copy->new($src_file_name);
$copy_html = do {
    $p->encode_suspects("shiftjis");
    $p->parse_to(catfile($sub_dir_name, $src_file_name));
};

ok($copy_html eq $result_html_nocharset, "parse_to no charset shift_jis");

##=== copy_to shift_jis
$copy_html = do {
    $p->copy_to($destination);
    open(my $in, "<".$p->io_layer, $destination)
                        or die "Can't open $destination.";
    read_and_unlink($destination, $p);
};

ok($copy_html eq $result_html_nocharset, "copy_to no charset shift_jis");

##== HTML with charset uft-8
my $src_html_utf8 = encode_utf8(<<EOT);
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
</head>
<body>
ああ
<a href="$linked_file_name"></a>
<frame src="$linked_file_name">
<img src="$linked_file_name">
<script src="$linked_file_name"></script>
<link href="$linked_file_name">
</body>
</html>
EOT

my $result_html_utf8 = encode_utf8(<<EOT);
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
</head>
<body>
ああ
<a href="../$linked_file_name"></a>
<frame src="../$linked_file_name">
<img src="../$linked_file_name">
<script src="../$linked_file_name"></script>
<link href="../$linked_file_name">
</body>
</html>
EOT

##== Test code with charset utf-8
open($src_fh, ">:utf8", $src_file_name)
                or die "Can't open $src_file_name.";
print $src_fh $src_html_utf8;
close $src_fh;

##=== parse_to
$p = HTML::Copy->new($src_file_name);

$copy_html = $p->parse_to($destination);

ok($copy_html eq $result_html_utf8, "parse_to charset UTF-8");

##=== copy_to
$p->copy_to($destination);
$copy_html = read_and_unlink($destination, $p);

ok($copy_html eq $result_html_utf8, "copy_to charset UTF-8");

##=== copy_to gving a file handle
$copy_html = do {
    open $in, "<", \$src_html_utf8;
    my $outdata ='';;
    my $p = HTML::Copy->new($in);
    open my $out, ">", $destination;
    $p->destination_path($destination);
    $p->copy_to($out);
    close $out;
    read_and_unlink($destination, $p);
};

ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving a file handle");

##=== copy_to gving file handles for input and output
$copy_html = do {
    open my $in, "<", \$src_html_utf8;
    my $outdata;
    my $p = HTML::Copy->new($in);
    open my $out, ">".$p->io_layer(), \$outdata;
    $p->destination_path($destination);
    $p->copy_to($out);
    Encode::decode($p->encoding, $outdata);
};

ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving file handles for input and output");

##=== parse_to giving a file handle
$copy_html = do {
    open my $in, "<", \$src_html_utf8;
    my $p = HTML::Copy->new($in);
    $p->parse_to($destination);
};

ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving file handles for input and output");

##=== copy_to with directory destination
$copy_html = do {
    my $p = HTML::Copy->new($src_file_name);
    my $destination = $p->copy_to($sub_dir_name);
    read_and_unlink($destination, $p);
};

ok($copy_html eq $result_html_utf8, "copy_to with a directory destination");

##== HTML with charset shift_jis
my $src_html_shiftjis = <<EOT;
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=shift_jis">
</head>
ああ
<a href="$linked_file_name"></a>
<frame src="$linked_file_name">
<img src="$linked_file_name">
<script src="$linked_file_name"></script>
<link href="$linked_file_name">
</html>
EOT

my $result_html_shiftjis = <<EOT;
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=shift_jis">
</head>
ああ
<a href="../$linked_file_name"></a>
<frame src="../$linked_file_name">
<img src="../$linked_file_name">
<script src="../$linked_file_name"></script>
<link href="../$linked_file_name">
</html>
EOT

##== Test code with charset shift_jis
open($src_fh, ">:encoding(shiftjis)", $src_file_name)
                        or die "Can't open $src_file_name.";
print $src_fh $src_html_shiftjis;
close $src_fh;

##=== parse_to
$p = HTML::Copy->new($src_file_name);
$p->encode_suspects("shiftjis");
$copy_html = $p->parse_to($destination);

ok($copy_html eq $result_html_shiftjis, "parse_to no charset shift_jis");

##=== copy_to
$p->copy_to($destination);
$copy_html = read_and_unlink($destination, $p);

ok($copy_html eq $result_html_shiftjis, "copy_to no charset shift_jis");

##== class_methods
$copy_html = HTML::Copy->parse_file($src_file_name, $destination);

ok($copy_html eq $result_html_shiftjis, "parse_file");

HTML::Copy->htmlcopy($src_file_name, $destination);

open($in, "<".$p->io_layer, $destination)
                                or die "Can't open $destination.";
{local $/; $copy_html = <$in>};
close $in;

ok($copy_html eq $result_html_shiftjis, "htmlcopy");

unlink($destination);

##== Test with base url
my $src_html_base = <<EOT;
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<base href="http://homepage.mac.com/tkurita/scriptfactory/">
</head>
ああ
<a href="$linked_file_name"></a>
<frame src="$linked_file_name">
<img src="$linked_file_name">
<script src="$linked_file_name"></script>
<link href="$linked_file_name">
</html>
EOT

##== Test code with base url
open($src_fh, ">:utf8", $src_file_name)
                    or die "Can't open $destination.";
print $src_fh $src_html_base;
close $src_fh;

##=== parse_to
$p = HTML::Copy->new($src_file_name);
$copy_html = $p->parse_to($destination);
ok($copy_html eq $src_html_base, "parse_to HTML with base URL");

##=== copy_to
$p->copy_to($destination);
open($in, "<".$p->io_layer, $destination)
                            or die "Can't open $destination.";
{local $/; $copy_html = <$in>};
close $in;

ok($copy_html eq $src_html_base, "copy_to HTML with base URL");
unlink($destination);

unlink($linked_file_name, $src_file_name, $destination);
rmdir($sub_dir_name);