#!/usr/bin/perl -w
#
# t/verify_sjis_ucs2.pl
#
# sjis=>ucs2¤Èucs2=>sjis¤ÎÁ´Ê¸»ú¥Æ¥¹¥È
# XS¦¤À¤±¤Î¤Æ¤¹¤È¡Ä.
#
# $ sh runtest.sh t/verify_sjis_ucs2.pl
#
# all sjis(0x0000-0xFFFF) => ucs2
# all ucs2(0x0000-0xFFFF) => sjis
#
use strict;
#BEGIN{$Unicode::Japanese::PurePerl = 1;}
use Unicode::Japanese;
use IO::File;
print "loading Uni::Jp\n";
Unicode::Japanese->new('');
my $msg = $Unicode::Japanese::xs_loaderror;
print "xs-load-message : [".(defined($msg)?$msg:'')."]".(!defined($msg)?' (undef)':$msg eq ''?' (empty)':'')."\n";
my $tablefh = new IO::File 'jcode/CP932.TXT'
or die "cannot open 'jcode/CP932.TXT'";
print "reading 'jcode/CP932.TXT'...\n";
my(%s2u,%u2s);
while(<$tablefh>)
{
next if(m/^#/);
next if(m/^$/);
chomp;
m/^0x([0-9a-fA-F]+)\s+(?:0x([0-9a-fA-F]+))?/ or die $_;
next if(!defined($2));
$s2u{hex($1)} = hex($2);
# CP932 Unicode
}
%u2s = reverse(%s2u);
$| = 1;
# --------------------------------------------------------------------
# ÉÔ°ìÃ×»þ¤Ë½ÐÎϤ¹¤ëÍÑ
sub dumpstr($$)
{
my($hdr,$str)=@_;
my $line = $hdr.sprintf(" : [len:%d]",length($str));
for( my $i=0; $i<length($str); ++$i )
{
$line .= sprintf(" %02x",unpack('C',substr($str,$i,1)));
}
print STDERR $line." : $str\r\n";
}
# --------------------------------------------------------------------
# tests sjis to ucs2
print "Testing sjis=>ucs2...\n";
test_sjis_ucs2();
sub upack
{
pack('n',shift);
}
sub sjis_ucs2
{
my $code = shift;
my $str = $code<=0xFF?pack("C",$code):pack('n',$code);
exists($s2u{$code}) ? upack($s2u{$code}) :
$code<=0xFF ? "\0?" :
$str =~ /^[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC]$/ ? "\0?" :
(
(exists($s2u{$code>>8}) ? upack($s2u{$code>>8}) : "\0?").
(exists($s2u{$code&255}) ? upack($s2u{$code&255}) : "\0?")
)
}
sub test_sjis_ucs2
{
printf "[0x%#04x]",0;
for( my $i=0x0; $i<=0xffff; ++$i )
{
if( ($i&0xFF)==0 && $i)
{
if( ($i&0x3FFF)==0 )
{
printf "\n[%#06x]",$i;
}else
{
print '.';
}
}
my $src = pack($i<=0xff?'c':'n',$i);
my $code = $i;
my $xs = Unicode::Japanese->new($src,'sjis')->ucs2();
my $test = sjis_ucs2($code);
if( $xs ne $test )
{
print STDERR "\n";
print STDERR "<<sjis=>utf8>>\n";
print STDERR "i : $i\n";
dumpstr('sjis',$src);
dumpstr('xs ',$xs);
dumpstr('test',$test);
exit;
}
}
print "\n";
}
# --------------------------------------------------------------------
# tests ucs2 to sjis
print "Testing ucs2=>sjis...\n";
test_ucs2_sjis();
sub spack
{
my $code = shift;
$code <= 0xFF ? pack('C',$code) : pack('n',$code);
}
sub ucs2_sjis
{
my $code = shift;
exists($u2s{$code}) ? spack($u2s{$code}) :
$code<=0x7F ? chr($code) :
'&#'.$code.';';
}
sub test_ucs2_sjis
{
printf "[0x%#04x]",0;
for( my $i=0x0; $i<=0xffff; ++$i )
{
if( ($i&0xFF)==0 && $i)
{
if( ($i&0x3FFF)==0 )
{
printf "\n[%#06x]",$i;
}else
{
print '.';
}
}
my $code = $i;
my $ucs2 = pack('n',$code);
my $xs = Unicode::Japanese->new($ucs2,'ucs2')->sjis();
my $test = ucs2_sjis($code);
if( $xs ne $test )
{
print STDERR "\n";
print STDERR "<<utf8=>sjis>>\n";
printf STDERR "i : 0x%04x\n",$i;
dumpstr('ucs2',$ucs2);
dumpstr('xs ',$xs);
dumpstr('test',$test);
exit;
}
}
print "\n";
}
# --------------------------------------------------------------------
# done
print "done\n";
# --------------------------------------------------------------------
# PurePerl code, copy from String.pl
#
use vars qw(@U2T);
sub _ucs2_utf8 {
my $this = shift;
my $str = shift;
if(!defined($str))
{
return '';
}
my $result = '';
for my $uc (unpack("n*", $str))
{
$result .= $U2T[$uc] ? $U2T[$uc] :
($U2T[$uc] = ($uc < 0x80) ? chr($uc) :
($uc < 0x800) ? chr(0xC0 | ($uc >> 6)) . chr(0x80 | ($uc & 0x3F)) :
chr(0xE0 | ($uc >> 12)) . chr(0x80 | (($uc >> 6) & 0x3F)) .
chr(0x80 | ($uc & 0x3F)));
}
$result;
}