#!/usr/bin/perl -w
#######################################################################
# A simple XHTML 1.0 Strict Validation script with encoding detection
#
# By Bjoern Hoehrmann bjoern@hoehrmann.de http://bjoern.hoehrmann.de
#######################################################################
BEGIN {
$ENV{SP_CHARSET_FIXED} = 1;
$ENV{SP_ENCODING} = "UTF-8";
$ENV{SP_BCTF} = "UTF-8";
}
sub ErrorHandler::new {bless {p=>$_[1]}, shift}
sub ErrorHandler::error
{
push @{$_[0]->{errors}}, $_[0]->{p}->split_message($_[1])
}
use strict;
use warnings;
use SGML::Parser::OpenSP qw();
use HTML::Encoding qw();
use HTML::Doctype qw();
use LWP::UserAgent qw();
use I18N::Charset qw();
use Encode qw();
use constant TEST_CATALOG =>
File::Spec->catfile(File::Spec->updir, 'samples', 'test.soc');
our @SP_OPTS = qw/
non-sgml-char-ref
valid
no-duplicate
xml
/;
my $u = LWP::UserAgent->new;
my $p = SGML::Parser::OpenSP->new;
my $e = ErrorHandler->new($p);
my $r = $u->get("http://www.w3.org/");
my $name1 = HTML::Encoding::encoding_from_http_message($r);
my $name2 = I18N::Charset::enco_charset_name($name1);
my $text = Encode::decode($name2 => $r->content);
# Validation
$p->handler($e);
$p->catalogs(TEST_CATALOG);
$p->warnings(@SP_OPTS);
$p->parse_string($text);
foreach my $error (@{$e->{errors}})
{
my $prim = $error->{primary_message};
printf "[%4d %4d %s]: %s\n",
$prim->{LineNumber},
$prim->{ColumnNumber},
$prim->{Severity},
$prim->{Text}
}
if (not @{$e->{errors}}) {
printf "No errors found!\n";
}