#!/opt/bin/perl
# creates lib/AnyEvent/Util/uts46.pl - better do not run it!
use common::sense;
use utf8;
no warnings 'utf8';
binmode STDOUT, ":utf8";
open my $fh, "GET http://www.unicode.org/Public/idna/9.0.0/IdnaMappingTable.txt |"
or die;
my $valid;
my $imap; # index map \x00 char replacement
while (<$fh>) {
next unless /^[0-9A-F]/;
/^
([0-9A-F]{4,}) (?: \.\.([0-9A-F]{4,}) )?
\s*;\s*(\S+)
(?: \s*;\s*([0-9A-F ]+?) )?
(?: \s*;[^;]+ )?
\s*
(?: \#.* )?
$
/x or die "$_: unparsable";
my ($r1, $r2, $type, $map) = (hex $1, hex $2, $3, $4);
my $R1 = chr $r1;
my $R2 = chr $r2;
$map = join "", map chr hex, split ' ', $map;
$type = "valid" if $type eq "deviation"; # use non-transitional behaviour for deviation characters
given ($type) {
when (/^(?:disallowed|disallowed_STD3_valid|disallowed_STD3_mapped)$/) {
# nop
}
when (/^(?:mapped|deviation|ignored)$/) {
$map = "\x01$map" if $type eq "deviation";
$imap .= "\x00" . chr . $map
for $r1 .. $r2 || $r1;
}
when (/^(?:valid)$/) {
(vec $valid, $_, 1) = 1
for $r1 .. $r2 || $r1;
}
default {
die "default: $R1,$R2,$type,$map;\n";
}
}
}
open my $fh, ">lib/AnyEvent/Util/uts46data.pl"
or die;
binmode $fh, ":perlio";
print $fh "# autogenerated by util/gen_uts46data\n";
utf8::encode $imap;
0 > index $imap, "\x02" # it's not supposed to be anywhere in there
or die "imap contains \\x02";
print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n";
# try to find a valid quoting character - there usually are many legal combos
for (1..127) { # stay out of utf-8 range
if (0 >= index $valid, chr) {
print $fh "\$uts46_valid = q", chr, $valid, chr, ";\n";
goto valid_ok;
}
}
die "unable to found valid quoting character";
valid_ok:;
print $fh "1;\n";
close $fh;