The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!perl
#
# This mkheader script makes two C header files,
# $FmH_File and $ToH_File (see below their values).
# These files are used to build Lingua::KO::MacKorean.
#
use 5.006001;
use Carp;
use strict;
use warnings;

my $MapFile  = "korean.map";
my $AddFile  = "addition.map";
my $EncName  = "macko";

###################

my $TypeSC = "struct mbc_contra";
my $TypeBC = 'STDCHAR'; # byte string
my $TypeMC = 'U16';     # multibyte char
my $TypeWC = 'U16';     # Unicode scalar value
my $TypeSL = 'U8';      # length of Unicode sequence from a multibyte char

my $FmH_File = "fm${EncName}.h";
my $ToH_File = "to${EncName}.h";

my (%FmMbc, %ToMbc, %ToMbcC, %Contra);

sub qstring {
    return sprintf '"%s"', join '', map sprintf("\\x%02x", $_), @_;
}

sub split_into_char {
    use bytes;
    my $uni = pack('U*', @_);
    my $len = length($uni);
    my @ary;
    for(my $i = 0; $i < $len; ++$i) {
	push @ary, ord(substr($uni,$i,1));
    }
    return @ary;
}

###################

for my $f ($AddFile, $MapFile) {
    open IN, "<$f" or die "$f $!";
    binmode IN;

    while (<IN>) {
	next if /^#/;
	next if /^\s*$/;

	my @t = split;
	my $mc = hex $t[0];
	next if ! $t[1];

	my @uv = map hex, split /\+/, $t[1];

	for my $u (@uv) {
	    $u > 0xffff and die "$u > 0xffff in $u";
	}

	if ($f eq $MapFile) {
	    my($lb,$tb) = unpack('CC', pack 'n', $mc);
	    $FmMbc{$lb}{$tb} = [ @uv ];
	}

	if (@uv == 1) {
	    my($row,$cel) = unpack('CC', pack 'n', $uv[0]);
	    $ToMbc{$row}{$cel} = $mc;
	}
	else {
	    my $base = shift @uv;
	    my($row,$cel) = unpack('CC', pack 'n', $base);
	    $ToMbcC{$row}{$cel} ++;
	    push @{ $Contra{$base} }, [ $mc, split_into_char(@uv) ];
	}
    }
    close IN or die "$f can't be closed.\n";
}

###################

open FM, ">$FmH_File" or die "$FmH_File $!" or die;
binmode FM;

foreach my $lb (sort { $a <=> $b } keys %FmMbc) {
    print FM "$TypeBC* fm_${EncName}_${lb} [256] = {\n";
    for (my $tb = 0; $tb < 256; $tb++) {
	my @uv = defined $FmMbc{$lb}{$tb} ? @{ $FmMbc{$lb}{$tb} } : ();
	my @c = split_into_char(@uv);
	my $str = qstring(@c);
	my $len = @c;
	print FM @uv ? "\t($TypeBC*)$str" : "\tNULL";
	print FM ','  if $tb != 255;
	print FM "\n" if $tb % 8 == 7;
    }
    print FM "};\n\n";
}

print FM "$TypeBC** fm_${EncName} [256] = {\n";
for (my $lb = 0; $lb < 256; $lb++) {
    print FM defined $FmMbc{$lb} ? "fm_${EncName}_$lb" : "NULL";
    print FM ','  if $lb != 255;
    print FM "\n" if $lb % 4 == 3;
}
print FM "};\n\n";

close FM or die "$FmH_File can't be closed.\n";

###################

open TO, ">$ToH_File" or die "$ToH_File $!" or die;
binmode TO;

print TO "$TypeSC { $TypeSL len; $TypeBC* string; $TypeMC mchar; };\n\n";

foreach my $uv (sort { $a <=> $b } keys %Contra) {
    my @list = sort { @$b <=> @$a } @{ $Contra{$uv} };
			# ordered from longest

    print TO "$TypeSC to_${EncName}_u${uv}_contra [] = {\n";
    foreach my $ele (@list) {
	my ($mc, @c) = @$ele;
	my $str = qstring(@c);
	my $len = @c;
	print TO "\t{ ($TypeSL)$len, ($TypeBC*)$str, ($TypeMC)$mc },\n";
    }
    print TO "{0,NULL,0}\n};\n\n";
}

foreach my $suffix ("", "_contra") {
    my $hash = $suffix ?  \%ToMbcC : \%ToMbc;
    my $type = $suffix ? "$TypeSC*" : $TypeMC;

    foreach my $row (sort { $a <=> $b } keys %$hash) {
	print TO "$type to_${EncName}_${row}${suffix} [256] = {\n";
	for (my $cel = 0; $cel < 256; $cel++) {
	    my $uv = $row * 256 + $cel;
	    if ($suffix) {
		printf TO "\t%s",
		    defined $hash->{$row}{$cel}
			? "to_${EncName}_u${uv}_contra"
			: "NULL";
	    } else {
		printf TO "\t%d",
		    defined $hash->{$row}{$cel} ? $hash->{$row}{$cel} : 0;
	    }
	    print TO ','  if $cel != 255;
	    print TO "\n" if $cel % 8 == 7;
	}
	print TO "};\n\n";
    }

    print TO "$type* to_${EncName}${suffix} [256] = {\n";
    for (my $row = 0; $row < 256; $row++) {
	print TO "\t", defined $hash->{$row}
	    ? "to_${EncName}_${row}${suffix}" : "NULL";
	print TO ','  if $row != 255;
	print TO "\n" if $row % 8 == 7;
    }
    print TO "};\n\n\n";
}

close TO or die "$ToH_File can't be closed.\n";

1;
__END__