#!/usr/bin/perl
# $File: //member/autrijus/Lingua-ZH-Toke/t/3-encoding.t $ $Author: autrijus $
# $Revision: #2 $ $Change: 9669 $ $DateTime: 2004/01/11 13:11:05 $
use strict;
use Test;
BEGIN {
eval { require encoding } or do {
plan tests => 0;
exit;
};
plan tests => 20;
}
use encoding 'big5';
require Lingua::ZH::Toke;
ok($Lingua::ZH::Toke::VERSION) if $Lingua::ZH::Toke::VERSION or 1;
Lingua::ZH::Toke->import('utf8');
# Create Lingua::ZH::Toke::Sentence object (->Sentence also works)
my $token = Lingua::ZH::Toke->new( '¨º¤H«o¦b/¿O¤õÁñ¬À³B/¯qµo·N¿³Áñ¬À' );
my $tmp = $token;
# Easy tokenization via array deferencing
$tmp = $tmp->[0];
ok("$tmp", '¨º¤H«o¦b', 'Tokenization - Fragment');
$tmp = $tmp->[2];
ok("$tmp", '«o¦b', 'Tokenization - Phrase');
$tmp = $tmp->[0];
ok("$tmp", '«o', 'Tokenization - Character');
$tmp = $tmp->[0];
ok("$tmp", '£¢£º£®£¿', 'Tokenization - Pronounciation');
$tmp = $tmp->[2];
ok("$tmp", '£®', 'Tokenization - Phonetic');
# Magic histogram via hash deferencing
ok($token->{"¨º¤H«o¦b"}, 1, 'Histogram - Fragment');
ok($token->{"·N¿³Áñ¬À"}, 1, 'Histogram - Phrase');
ok($token->{"µo·N¿³Áñ"}, undef, 'Histogram - No Phrase');
ok($token->{"¬À"}, 2, 'Histogram - Character');
ok($token->{"£¸£¿"}, 2, 'Histogram - Pronounciation');
ok($token->{"£¹"}, 3, 'Histogram - Phonetic');
my @phrases = qw(¨º ¤H «o¦b ¿O¤õ Áñ¬À ³B ¯qµo ·N¿³Áñ¬À);
# Iteration
while ($tmp = <$token>) { # iterate each fragment
while (<$tmp>) { # iterate each phrase
ok("$_", shift(@phrases), 'Iteration');
}
}
1;