The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w

# Copyright 2008, 2009, 2010, 2013, 2015 Kevin Ryde

# This file is part of HTML-FormatExternal.
#
# HTML-FormatExternal is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation; either version 3, or (at your option) any
# later version.
#
# HTML-FormatExternal is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along
# with HTML-FormatExternal.  If not, see <http://www.gnu.org/licenses/>.

use 5.006;
use strict;
use warnings;
use FindBin;
use HTML::FormatExternal;
use Test::More tests => 284;

use lib 't';
use MyTestHelpers;
BEGIN { MyTestHelpers::nowarnings() }

if (defined $ENV{PATH}) {
  ($ENV{PATH}) = ($ENV{PATH} =~ /(.*)/);  # untaint so programs can run
}

{
  my $want_version = 26;
  is ($HTML::FormatExternal::VERSION, $want_version,
      'VERSION variable');
  is (HTML::FormatExternal->VERSION,  $want_version,
      'VERSION class method');
  ok (eval { HTML::FormatExternal->VERSION($want_version); 1 },
      "VERSION class check $want_version");
  my $check_version = $want_version + 1000;
  ok (! eval { HTML::FormatExternal->VERSION($check_version); 1 },
      "VERSION class check $check_version");
}


# Cribs:
#
# Test::More::like() ends up spinning a qr// through a further /$re/ which
# loses any /m modifier, prior to perl 5.10.0 at least.  So /m is avoided in
# favour of some (^|\n) and ($|{\r\n]) forms.


sub is_undef_or_string {
  my ($obj) = @_;
  if (! defined $obj) { return 1; }
  if (ref $obj) { return 0; }
  if ($obj eq '') { return 0; } # disallow empty
  return 1;
}

sub is_undef_or_one_line_string {
  my ($obj) = @_;
  if (! defined $obj) { return 1; }
  if (ref $obj) { return 0; }
  if ($obj eq '') { return 0; } # disallow empty
  if ($obj =~ /\n/) { return 0; }
  return 1;
}

my $colon_is_ordinary;

foreach my $class ('HTML::FormatText::Elinks',
                   'HTML::FormatText::Html2text',
                   'HTML::FormatText::Links',
                   'HTML::FormatText::Lynx',
                   'HTML::FormatText::Netrik',
                   'HTML::FormatText::W3m',
                   'HTML::FormatText::Zen',
                  ) {
  diag $class;
  use_ok ($class);

  is ($class->VERSION,
      $HTML::FormatExternal::VERSION,
      "$class VERSION method");
  is (do { no strict 'refs'; ${"${class}::VERSION"} },
      $HTML::FormatExternal::VERSION,
      "$class VERSION variable");

  #
  # program_full_version()
  #
  { my $version = $class->program_full_version;
    require Data::Dumper;
    diag ("$class program_full_version ", Data::Dumper::Dumper($version));
    ok (is_undef_or_string($version),
        'program_full_version() from class');
  }
  { my $formatter = $class->new;
    my $version = $formatter->program_full_version;
    ok (is_undef_or_string($version),
        'program_full_version() from obj');
  }

  #
  # program_version()
  #
  { my $version = $class->program_version();
    require Data::Dumper;
    diag ("$class program_version ", Data::Dumper::Dumper($version));
    ok (is_undef_or_one_line_string($version),
        "$class program_version() from class");
  }
  { my $formatter = $class->new;
    my $version = $formatter->program_version();
    ok (is_undef_or_one_line_string($version),
        "$class program_version() from obj");
  }

  foreach my $method ('_have_nomargins',
                      '_have_html_margin',
                      '_have_ascii') {
    if ($class->can($method)) {
      diag "$class $method() ",($class->$method ? "yes" : "no");
    }
  }


 SKIP: {
    if (! defined $class->program_full_version) {
      skip "$class program not available", 33;
    }

    { my $str = $class->format_string ('<html><body>Hello</body><html>');
      like ($str, qr/Hello/,
            "$class through class");
    }
    { my $formatter = $class->new;
      my $str = $formatter->format ('<html><body>Hello</body><html>');
      like ($str, qr/Hello/,
            "$class through formatter object");
    }

  SKIP: {
      eval { require HTML::TreeBuilder }
        or skip 'HTML::TreeBuilder not available', 1;

      my $tree = HTML::TreeBuilder->new_from_content
        ('<html><body>Hello</body><html>');
      my $formatter = $class->new;
      my $str = $formatter->format ($tree);
      like ($str, qr/Hello/,
            "$class through formatter object on TreeBuilder");
    }

  SKIP: {
      if ($class =~ /Lynx/ && ! $class->_have_nomargins()) {
        skip "this Lynx doesn't have -nomargins", 1;
      }
      if ($class =~ /Links/ && ! $class->_have_html_margin()) {
        skip "this links doesn't have -html-margin", 1;
      }

      my $str = $class->format_string ('<html><body>Hello</body><html>',
                                       leftmargin => 0);
      like ($str, qr/(^|\n)Hello/,  # allowing for leading blank lines
            "$class through class, with leftmargin 0");
    }

  SKIP: {
      if ($class =~ /Zen/) {
        skip "$class doesn't support rightmargin", 1;
      }
      if ($class =~ /Lynx/ && ! $class->_have_nomargins()) {
        skip "this Lynx doesn't have -nomargins", 1;
      }
      if ($class =~ /Links/ && ! $class->_have_html_margin()) {
        skip "this links doesn't have -html-margin", 1;
      }

      my $html = '<html><body>123 567 9012 abc def ghij</body><html>';
      my $str = $class->format_string ($html,
                                       leftmargin => 0,
                                       rightmargin => 12);
      {
        require Data::Dumper;
        my $dumper = Data::Dumper->new([$str],['output']);
        $dumper->Useqq (1);
        diag ($dumper->Dump);
      }
      like ($str, qr/(^|\n)123 567 9012($|[\r\n])/,
            "$class through class, with leftmargin 0 rightmargin 12");
    }

    foreach my $data
      ([ 'ascii',
         '',
         '<html><body><a href="page.html">Foo</a></body></html>',
         'http://foo.org/page.html' ],

       [ 'utf16le',
         "\377\376",
         "<\0h\0t\0m\0l\0>\0<\0b\0o\0d\0y\0>\0<\0a\0 \0h\0r\0e\0f\0=\0\"\0p\0a\0g\0e\0.\0h\0t\0m\0l\0\"\0>\0F\0o\0o\0<\0/\0a\0>\0<\0/\0b\0o\0d\0y\0>\0<\0/\0h\0t\0m\0l\0>\0",
         'http://foo.org/page.html' ],

       [ 'utf16be',
         "\376\377",
         "\0<\0h\0t\0m\0l\0>\0<\0b\0o\0d\0y\0>\0<\0a\0 \0h\0r\0e\0f\0=\0\"\0p\0a\0g\0e\0.\0h\0t\0m\0l\0\"\0>\0F\0o\0o\0<\0/\0a\0>\0<\0/\0b\0o\0d\0y\0>\0<\0/\0h\0t\0m\0l\0>",
         'http://foo.org/page.html' ],

       ['utf32le',
        "\377\376\0\0",
        "<\0\0\0h\0\0\0t\0\0\0m\0\0\0l\0\0\0>\0\0\0<\0\0\0b\0\0\0o\0\0\0d\0\0\0y\0\0\0>\0\0\0<\0\0\0a\0\0\0 \0\0\0h\0\0\0r\0\0\0e\0\0\0f\0\0\0=\0\0\0\"\0\0\0p\0\0\0a\0\0\0g\0\0\0e\0\0\0.\0\0\0h\0\0\0t\0\0\0m\0\0\0l\0\0\0\"\0\0\0>\0\0\0F\0\0\0o\0\0\0o\0\0\0<\0\0\0/\0\0\0a\0\0\0>\0\0\0<\0\0\0/\0\0\0b\0\0\0o\0\0\0d\0\0\0y\0\0\0>\0\0\0<\0\0\0/\0\0\0h\0\0\0t\0\0\0m\0\0\0l\0\0\0>\0\0\0",
        'http://foo.org/page.html' ],

       [ 'utf32be',
         "\0\0\376\377",
         "\0\0\0<\0\0\0h\0\0\0t\0\0\0m\0\0\0l\0\0\0>\0\0\0<\0\0\0b\0\0\0o\0\0\0d\0\0\0y\0\0\0>\0\0\0<\0\0\0a\0\0\0 \0\0\0h\0\0\0r\0\0\0e\0\0\0f\0\0\0=\0\0\0\"\0\0\0p\0\0\0a\0\0\0g\0\0\0e\0\0\0.\0\0\0h\0\0\0t\0\0\0m\0\0\0l\0\0\0\"\0\0\0>\0\0\0F\0\0\0o\0\0\0o\0\0\0<\0\0\0/\0\0\0a\0\0\0>\0\0\0<\0\0\0/\0\0\0b\0\0\0o\0\0\0d\0\0\0y\0\0\0>\0\0\0<\0\0\0/\0\0\0h\0\0\0t\0\0\0m\0\0\0l\0\0\0>",
         'http://foo.org/page.html' ],
      ) {
      my ($charset, $charset_bom, $html, $want_str) = @$data;
      my $want_re = qr/\Q$want_str/;

      foreach my $bom ('',
                       ($charset_bom ? ($charset_bom) : ())) {
      SKIP: {
          # html2text -- doesn't show link targets
          # links     -- doesn't show link targets
          # w3m       -- doesn't show link targets
          # zen       -- shows only the plain href part, doesn't expand
          if ($class !~ /Elinks|Lynx/) {
            skip "$class doesn't display absolutized links", 2;
          }
          if ($charset ne 'ascii' && $class =~ /Elinks/) {
            skip "$class only takes 8-bit input (as of 0.12pre5)", 2;
          }

          $html = "$bom$html";
          my @input_charset = ($bom ? (input_charset => $charset) : ());
          my $desc = "$class base, $charset, ".($bom?'bom':'input_charset');
          {
            my $str = $class->format_string ($html,
                                             base => 'http://foo.org',
                                             output_charset => 'us-ascii',
                                             @input_charset);
            # require Data::Dumper;
            # diag "$charset ", Data::Dumper->new([\$str],['str'])->Dump;

            like ($str, $want_re, "format_string() $desc");
          }
          {
            require File::Temp;
            my $fh = File::Temp->new (SUFFIX => '.html');
            $fh->autoflush(1);
            binmode($fh) or die 'Cannot set binmode on temp file for test';
            print $fh $html or die 'Cannot write temp file for test';
            my $filename = $fh->filename;
            my $str = $class->format_file ($filename,
                                           base => 'http://foo.org',
                                           output_charset => 'us-ascii',
                                           @input_charset);
            like ($str, $want_re, "format_string() $desc");
          }
        }
      }
    }

    # Exercise some strange filenames which might provoke the formatter
    # programs.
    #
    {
      require File::Spec;
      my $testfilename = File::Spec->catfile($FindBin::Bin,'test.html');

      if (! defined $colon_is_ordinary) {
        my $is_absolute = File::Spec->splitpath('C:/FOO');
        my ($volume,$directories,$file) = File::Spec->splitpath('C:FOO');
        $colon_is_ordinary = ($volume eq ''
                              && $directories eq ''
                              && ! $is_absolute
                              ? 1 : 0);
      };
      diag "Colon character is ordinary in filenames: ", $colon_is_ordinary;

      require File::Temp;
      require File::Copy;
      my $tempdir_object = File::Temp->newdir;
      my $tempdir_name = $tempdir_object->dirname;
      diag "Temporary directory ",$tempdir_name;

      foreach my $filename ('http:',
                            '-',
                            '-###',
                            '%57',
                            'a/b',    # filename with "/" probably uncreatable
                           ) {
        my $fullname = File::Spec->catfile($tempdir_name,$filename);

      SKIP: {
          # Don't attempt colon on Mac, MS-DOS, OS/2 etc where it's a volume
          # or directory separator.
          #
          # Cygwin translates ":" and other characters special to windows to
          # some unicode private chars, allowing them to be used
          # https://cygwin.com/cygwin-ug-net/using-specialnames.html
          # But that depends on the external program being a cygwin build
          # too, which is likely but not certain.
          #
          if ($filename =~ /:/ && ! $colon_is_ordinary) {
            skip "Cannot copy test file to $fullname: $!", 2;
          }
          # might be impossible to create a file with a slash like "a/b"
          File::Copy::copy($testfilename, $fullname)
              or skip "Cannot copy test file to $fullname: $!", 2;

          {
            my $str = $class->format_file ($fullname);
            like ($str, qr/body.*text/,
                  "$class format_file() filename \"$fullname\"");
          }

          require Cwd;
          my $old_dir = Cwd::getcwd();
          ($old_dir) = ($old_dir =~ /(.*)/);  # untaint
          chdir $tempdir_name or die "Oops, cannot chdir to $tempdir_name";

          {
            my $str = $class->format_file ($filename);
            like ($str, qr/body.*text/,
                  "$class format_file() filename \"$filename\"");
          }

          chdir $old_dir or die "Oops, cannot chdir back to $old_dir";
        }

        # actually File::Temp removes files in its temporary directory anyway
        unlink $fullname;
      }
    }
  }
}

exit 0;