The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
# $File: //member/autrijus/Lingua-ZH-Toke/t/2-utf8.t $ $Author: autrijus $
# $Revision: #2 $ $Change: 9669 $ $DateTime: 2004/01/11 13:11:05 $

use strict;
use Test;

BEGIN {
    eval { require Encode::compat } if $] < 5.007;
    eval { require Encode } or do {
	plan tests => 0;
	exit;
    };
    plan tests => 20;
}

use utf8;
require Lingua::ZH::Toke;
ok($Lingua::ZH::Toke::VERSION) if $Lingua::ZH::Toke::VERSION or 1;
Lingua::ZH::Toke->import('utf8');

# Create Lingua::ZH::Toke::Sentence object (->Sentence also works)
my $token = Lingua::ZH::Toke->new( '那人卻在/燈火闌珊處/益發意興闌珊' );

my $tmp = $token;

# Easy tokenization via array deferencing
$tmp = $tmp->[0];
ok("$tmp", '那人卻在',    'Tokenization - Fragment');
$tmp = $tmp->[2];
ok("$tmp", '卻在',	    'Tokenization - Phrase');
$tmp = $tmp->[0];
ok("$tmp", '卻',	    'Tokenization - Character');
$tmp = $tmp->[0];
ok("$tmp", 'ㄑㄩㄝˋ',	    'Tokenization - Pronounciation');
$tmp = $tmp->[2];
ok("$tmp", 'ㄝ',	    'Tokenization - Phonetic');

# Magic histogram via hash deferencing
ok($token->{"那人卻在"},    1,	    'Histogram - Fragment');
ok($token->{"意興闌珊"},    1,	    'Histogram - Phrase');
ok($token->{"發意興闌"},    undef,  'Histogram - No Phrase');
ok($token->{"珊"},	    2,	    'Histogram - Character');
ok($token->{"ㄧˋ"},	    2,	    'Histogram - Pronounciation');
ok($token->{"ㄨ"},	    3,	    'Histogram - Phonetic');

my @phrases = qw(那 人 卻在 燈火 闌珊 處 益發 意興闌珊);

# Iteration
while ($tmp = <$token>) {	# iterate each fragment
    while (<$tmp>) {		# iterate each phrase
	ok("$_", shift(@phrases), 'Iteration');
    }
}

1;