The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/opt/bin/perl

# creates lib/AnyEvent/Util/uts46.pl - better do not run it!

use common::sense;
use utf8;
no warnings 'utf8';

binmode STDOUT, ":utf8";

open my $fh, "GET http://www.unicode.org/Public/idna/9.0.0/IdnaMappingTable.txt |"
   or die;

my $valid;
my $imap;   # index map \x00 char replacement

while (<$fh>) {
   next unless /^[0-9A-F]/;

   /^
    ([0-9A-F]{4,}) (?: \.\.([0-9A-F]{4,}) )?
    \s*;\s*(\S+)
    (?: \s*;\s*([0-9A-F ]+?) )?
    (?: \s*;[^;]+ )?
    \s*
    (?: \#.* )?
    $
   /x or die "$_: unparsable";

   my ($r1, $r2, $type, $map) = (hex $1, hex $2, $3, $4);

   my $R1 = chr $r1;
   my $R2 = chr $r2;

   $map = join "", map chr hex, split ' ', $map;

   $type = "valid" if $type eq "deviation"; # use non-transitional behaviour for deviation characters

   given ($type) {
      when (/^(?:disallowed|disallowed_STD3_valid|disallowed_STD3_mapped)$/) {
         # nop
      }
      when (/^(?:mapped|deviation|ignored)$/) {
         $map = "\x01$map" if $type eq "deviation";

         $imap .= "\x00" . chr . $map
            for $r1 .. $r2 || $r1;
      }
      when (/^(?:valid)$/) {
         (vec $valid, $_, 1) = 1
            for $r1 .. $r2 || $r1;
      }
      default {
         die "default: $R1,$R2,$type,$map;\n";
      }
   }
}

open my $fh, ">lib/AnyEvent/Util/uts46data.pl"
   or die;
binmode $fh, ":perlio";
print $fh "# autogenerated by util/gen_uts46data\n";

utf8::encode $imap;
0 > index $imap, "\x02" # it's not supposed to be anywhere in there
   or die "imap contains \\x02";
print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n";

# try to find a valid quoting character - there usually are many legal combos
for (1..127) { # stay out of utf-8 range
   if (0 >= index $valid, chr) {
      print $fh "\$uts46_valid = q", chr, $valid, chr, ";\n";
      goto valid_ok;
   }
}
die "unable to found valid quoting character";
valid_ok:;

print $fh "1;\n";
close $fh;