The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
use strict;
use warnings;
use utf8;
use Lingua::JA::NormalizeText qw/unify_whitespaces/;
use Test::More;

binmode Test::More->builder->$_ => ':utf8'
    for qw/output failure_output todo_output/;


my $normalizer = Lingua::JA::NormalizeText->new(qw/unify_whitespaces/);

my @skip_chars = ( chr hex('0009'), chr hex('000A'), chr hex('000D'), chr hex('3000') );
my $text = "\x{0009}\x{000A}\x{000B}\x{000C}\x{000D}\x{0020}\x{0085}\x{00A0}\x{1680}\x{180E}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005}\x{2006}\x{2007}\x{2008}\x{2009}\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}";

for my $char (split(//, $text))
{
    if (grep { $char eq $_ } @skip_chars)
    {
        is(unify_whitespaces($char), $char);
        is(unify_whitespaces($char x 2), $char x 2);
        is($normalizer->normalize($char x 3), $char x 3);
    }
    else
    {
        is(unify_whitespaces($char), "\x{0020}");
        is(unify_whitespaces($char x 2), "\x{0020}" x 2);
        is($normalizer->normalize($char x 3), "\x{0020}" x 3);
    }
}

done_testing;