#!/usr/bin/perl -w
open(RFC, "rfc1345.txt") || die;
my %mnemonics;
my %fullname;
my %aliases;
my $charset;
my @aliases;
my $code;
my @map;
unless (-d "maps") {
print STDERR "Making directory 'maps'\n";
mkdir('maps', 0777) || die "Can't create directory: $!";
}
if (open(ALIASES, "maps/aliases")) {
while (<ALIASES>) {
next if /^\s*\#/;
next if /^\s*$/;
chomp;
my($charset, @aliases) = split(' ', $_);
$aliases{$charset} = { map {$_ => 1} @aliases };
}
}
while (<RFC>) {
if (/^3\.\s/ .. /^4\.\s/) { # only want chapter 3
if (/^ (\S+)\s+([0-9a-f]{4})\s+(.+)/) {
$mnemonics{$1} = $2;
$fullname{$3} = $2;
}
}
if (/^5\.\s/ .. /^ACKNOWLEDGEMENTS/) {
if (/^ &charset\s+(\S+)/) {
#print "$1\n";
map_out();
$charset = $1;
@aliases = ();
undef($code);
@map = ();
} elsif (/^ &alias\s+(\S+)/) {
#print " $1\n";
push(@aliases, $1);
} elsif (/^ &bits\s+(\d+)/) {
#print " BITS = $1\n";
if ($1 ne '8') {
undef($charset); # don't care about this one
}
} elsif (/^ &code\s+(\d+)/) {
#print " CODE=$1\n";
$code = $1;
} elsif (/^ &duplicate\s+(\d+)\s+(\S+)/) {
#print "DUP $1 $2\n";
push(@map, [$1, $2]);
} elsif (/^ &([a-z][a-z0-9]+)/) {
#print "$1\n";
} elsif (/^ (\S+ +.*)/ && $charset && defined($code)) {
my $mne;
for $mne (split(' ', $1)) {
if ($mne eq "??") {
$code++;
next;
} else {
if ($code > 255) {
print STDERR "$charset: bad code $code\n";
undef($charset); # ignore it
last;
}
push(@map, [$code++, $mne]);
}
}
}
}
}
map_out();
open(ALIASES, ">maps/aliases") || die "Can't write aliases: $!";
for (sort keys %aliases) {
delete $aliases{$_}{$_}; # if we managed to get an alias to ourself
print ALIASES "$_ ", join(" ", sort keys %{$aliases{$_}}), "\n";
}
close(ALIASES);
sub map_out
{
return unless $charset;
while ($charset =~ /[^\w\-\.]/) {
my $orig = $charset;
if ($charset =~ s/:\d+$// ||
$charset =~ s/_\(\d+\)$//)
{
push(@aliases, $orig);
} else {
die "Can't wash $charset\n";
}
}
if ($charset =~ /^ISO_8859-(\d+)$/) {
push(@aliases, "iso8859-$1", "8859-$1");
# Fix the MACRON vs. OVERLINE bug in these encodings
if ($1 == 1 || $1 == 4 || $1 == 9) {
splice(@map, 0xAF, 0, [0xAF, "'m"]);
}
}
print STDERR "$charset @aliases\n";
for (@aliases) {
$aliases{$charset}{$_}++;
}
open(BINMAP, "| perl ./map8_txt2bin >maps/$charset.bin") or die;
binmode BINMAP;
for (@map) {
my($code, $mne) = @$_;
my $x4 = $mnemonics{$mne};
unless ($x4) {
print STDERR "$charset: no code for $mne\n";
next;
}
if ($code < 0 || $code > 255) {
print STDERR "$charset: bad code $code\n";
next;
}
printf BINMAP "0x%02x 0x%s\n", $code, $x4;
}
close(BINMAP);
}