The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w
#######################################################################
# A simple XHTML 1.0 Strict Validation script with encoding detection
#
# By Bjoern Hoehrmann bjoern@hoehrmann.de http://bjoern.hoehrmann.de
#######################################################################

BEGIN {
  $ENV{SP_CHARSET_FIXED} = 1;      
  $ENV{SP_ENCODING}      = "UTF-8";
  $ENV{SP_BCTF}          = "UTF-8";
}

sub ErrorHandler::new {bless {p=>$_[1]}, shift}
sub ErrorHandler::error
{
  push @{$_[0]->{errors}}, $_[0]->{p}->split_message($_[1])
}

use strict;
use warnings;
use SGML::Parser::OpenSP qw();
use HTML::Encoding qw();
use HTML::Doctype qw();
use LWP::UserAgent qw();
use I18N::Charset qw();
use Encode qw();

use constant TEST_CATALOG =>
  File::Spec->catfile(File::Spec->updir, 'samples', 'test.soc');

our @SP_OPTS = qw/
  non-sgml-char-ref
  valid
  no-duplicate
  xml
/;

my $u = LWP::UserAgent->new;
my $p = SGML::Parser::OpenSP->new;
my $e = ErrorHandler->new($p);

my $r = $u->get("http://www.w3.org/");

my $name1 = HTML::Encoding::encoding_from_http_message($r);
my $name2 = I18N::Charset::enco_charset_name($name1);
my $text = Encode::decode($name2 => $r->content);

# Validation
$p->handler($e);
$p->catalogs(TEST_CATALOG);
$p->warnings(@SP_OPTS);
$p->parse_string($text);

foreach my $error (@{$e->{errors}})
{
    my $prim = $error->{primary_message};
    printf "[%4d %4d %s]: %s\n",
      $prim->{LineNumber},
      $prim->{ColumnNumber},
      $prim->{Severity},
      $prim->{Text}
}

if (not @{$e->{errors}}) {
  printf "No errors found!\n";
}