The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 9;
use Search::Tools::XML;
use Search::Tools::UTF8;

my $txt = "FOR IMMEDIATE RELEASE

Music Festival

June 2011
";

my $XML = Search::Tools::XML->new;

like( ' ',      qr/\s/, "ascii 32 == \\s" );
like( "\n",     qr/\s/, "ascii 10 == \\s" );
like( "\r",     qr/\s/, "ascii 13 == \\s" );
like( "\t",     qr/\s/, "ascii tab == \\s" );
like( "\x200b", qr/\s/, "\\x200b == \\s" );
like( "\x2028", qr/\s/, "\\x2028 == \\s" );

ok( is_valid_utf8($txt), "is_valid_utf8 pre strip_html" );
my $clean = $XML->strip_html( $txt, 1 );
ok( is_valid_utf8($clean), "is_valid_utf8 post strip_html" );
is( $clean,
    "FOR IMMEDIATE RELEASE Music Festival June 2011 ",
    "got clean text"
);

#debug_bytes($clean);
#diag($clean);