The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#! /usr/bin/perl
#---------------------------------------------------------------------
# 20-open.t
# Copyright 2012 Christopher J. Madsen
#
# Actually open files and check the encoding
#---------------------------------------------------------------------

use strict;
use warnings;

use Test::More 0.88;

plan tests => 85;

use IO::HTML;
use File::Temp;
use Scalar::Util 'blessed';

#---------------------------------------------------------------------
sub test
{
  my ($expected, $out, $data, $name, $nextArg) = @_;

  local $Test::Builder::Level = $Test::Builder::Level + 1;

  my $options;
  if (ref $name) {
    $options = $name;
    $name    = $nextArg;
  }

  unless ($name) {
    $name = 'test ' . ($expected || 'cp1252');
  }

  my $tmp = File::Temp->new(UNLINK => 1);
  open(my $mem, '>', \(my $buf)) or die;

  if ($out) {
    $out = ":encoding($out)" unless $out =~ /^:/;
    binmode $tmp, $out;
    binmode $mem, $out;
  }

  print $mem $data;
  print $tmp $data;
  close $mem;
  $tmp->close;

  my ($fh, $encoding, $bom) = IO::HTML::file_and_encoding("$tmp", $options);

  if ($options and $options->{encoding}) {
    ok(blessed($encoding), 'returned an object');

    $encoding = eval { $encoding->name };
  }

  is($encoding, $expected || 'cp1252', $name);

  my $firstLine = <$fh>;
  like($firstLine, qr/^<html/i);

  close $fh;

  $fh = html_file("$tmp", $options);

  is(<$fh>, $firstLine);

  close $fh;

  # Test sniff_encoding:
  undef $mem;
  open($mem, '<', \$buf) or die "Can't open in-memory file: $!";

  delete $options->{encoding} if $options;

  ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options);

  is($encoding, $expected);

  seek $mem, 0, 0;

  $options->{encoding} = 1;

  ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options);

  if (defined $expected) {
    ok(blessed($encoding), 'encoding is an object');

    is(eval { $encoding->name }, $expected);
  } else {
    is($encoding, undef);
  }
} # end test

#---------------------------------------------------------------------
test 'utf-8-strict' => '' => <<'';
<html><meta charset="UTF-8">

test 'utf-8-strict' => ':utf8' => <<"";
<html><head><title>Foo\xA0Bar</title>

test undef, latin1 => <<"";
<html><head><title>Foo\xA0Bar</title>

test 'UTF-16BE' => 'UTF-16BE' => <<"";
\x{FeFF}<html><head><title>Foo\xA0Bar</title>

test 'utf-8-strict' => ':utf8' => <<"";
\x{FeFF}<html><meta charset="UTF-16">

test 'utf-8-strict' => ':utf8' => <<"";
<html><meta charset="UTF-16BE">

test 'UTF-16LE' => 'UTF-16LE' => <<"";
\x{FeFF}<html><meta charset="UTF-16">

test 'UTF-16LE' => 'UTF-16LE' => <<"", { encoding => 1 };
\x{FeFF}<html><meta charset="UTF-16">

test 'utf-8-strict' => ':utf8' => <<"", { encoding => 1, need_pragma => 0 };
<html><meta charset="UTF-16BE">

test 'utf-8-strict' => ':utf8' =>
  "<html><title>Foo\xA0Bar" . ("\x{2014}" x 512) . "</title>\n",
  'UTF-8 character crosses boundary';

test 'utf-8-strict' => ':utf8' =>
  "<html><title>Foo Bar" . ("\x{2014}" x 512) . "</title>\n",
  'UTF-8 character crosses boundary 2';

test undef, '', <<'', 'wrong pragma';
<html>
<head>
<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
<title>Title</title>

test 'utf-8-strict', '', <<'', {need_pragma => 0}, 'need_pragma 0';
<html>
<head>
<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
<title>Title</title>

test 'iso-8859-15', '', <<"", { encoding => 1, need_pragma => 0 };
<html>
<meta content="text/html; charset=ISO-8859-15">
<meta charset="UTF-16BE">

done_testing;