The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
# $File: //member/autrijus/Lingua-ZH-Toke/t/3-encoding.t $ $Author: autrijus $
# $Revision: #2 $ $Change: 9669 $ $DateTime: 2004/01/11 13:11:05 $

use strict;
use Test;

BEGIN {
    eval { require encoding } or do {
	plan tests => 0;
	exit;
    };
    plan tests => 20;
}

use encoding 'big5';
require Lingua::ZH::Toke;
ok($Lingua::ZH::Toke::VERSION) if $Lingua::ZH::Toke::VERSION or 1;
Lingua::ZH::Toke->import('utf8');

# Create Lingua::ZH::Toke::Sentence object (->Sentence also works)
my $token = Lingua::ZH::Toke->new( '¨º¤H«o¦b/¿O¤õÁñ¬À³B/¯qµo·N¿³Áñ¬À' );

my $tmp = $token;

# Easy tokenization via array deferencing
$tmp = $tmp->[0];
ok("$tmp", '¨º¤H«o¦b',    'Tokenization - Fragment');
$tmp = $tmp->[2];
ok("$tmp", '«o¦b',	    'Tokenization - Phrase');
$tmp = $tmp->[0];
ok("$tmp", '«o',	    'Tokenization - Character');
$tmp = $tmp->[0];
ok("$tmp", '£¢£º£®£¿',	    'Tokenization - Pronounciation');
$tmp = $tmp->[2];
ok("$tmp", '£®',	    'Tokenization - Phonetic');

# Magic histogram via hash deferencing
ok($token->{"¨º¤H«o¦b"},    1,	    'Histogram - Fragment');
ok($token->{"·N¿³Áñ¬À"},    1,	    'Histogram - Phrase');
ok($token->{"µo·N¿³Áñ"},    undef,  'Histogram - No Phrase');
ok($token->{"¬À"},	    2,	    'Histogram - Character');
ok($token->{"£¸£¿"},	    2,	    'Histogram - Pronounciation');
ok($token->{"£¹"},	    3,	    'Histogram - Phonetic');

my @phrases = qw(¨º ¤H «o¦b ¿O¤õ Áñ¬À ³B ¯qµo ·N¿³Áñ¬À);

# Iteration
while ($tmp = <$token>) {	# iterate each fragment
    while (<$tmp>) {		# iterate each phrase
	ok("$_", shift(@phrases), 'Iteration');
    }
}

1;