The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w

open(RFC, "rfc1345.txt") || die;

my %mnemonics;
my %fullname;
my %aliases;

my $charset;
my @aliases;
my $code;
my @map;

unless (-d "maps") {
    print STDERR "Making directory 'maps'\n";
    mkdir('maps', 0777) || die "Can't create directory: $!";
}

if (open(ALIASES, "maps/aliases")) {
    while (<ALIASES>) {
	next if /^\s*\#/;
	next if /^\s*$/;
	chomp;
	my($charset, @aliases) = split(' ', $_);
	$aliases{$charset} = { map {$_ => 1} @aliases };
    }
}

while (<RFC>) {
    if (/^3\.\s/ .. /^4\.\s/) {  # only want chapter 3
	if (/^ (\S+)\s+([0-9a-f]{4})\s+(.+)/) {
	    $mnemonics{$1} = $2;
	    $fullname{$3}  = $2;
	}
    }
    if (/^5\.\s/ .. /^ACKNOWLEDGEMENTS/) {
	if (/^  &charset\s+(\S+)/) {
	    #print "$1\n";
	    map_out();
	    $charset = $1;
	    @aliases = ();
	    undef($code);
	    @map = ();
	} elsif (/^  &alias\s+(\S+)/) {
	    #print " $1\n";
	    push(@aliases, $1);
	} elsif (/^  &bits\s+(\d+)/) {
	    #print " BITS = $1\n";
	    if ($1 ne '8') {
		undef($charset);  # don't care about this one
	    }
	} elsif (/^  &code\s+(\d+)/) {
	    #print " CODE=$1\n";
	    $code = $1;
	} elsif (/^  &duplicate\s+(\d+)\s+(\S+)/) {
	    #print "DUP $1 $2\n";
	    push(@map, [$1, $2]);
	} elsif (/^  &([a-z][a-z0-9]+)/) {
	    #print "$1\n";
	} elsif (/^  (\S+ +.*)/ && $charset && defined($code)) {
	    my $mne;
	    for $mne (split(' ', $1)) {
		if ($mne eq "??") {
		    $code++;
		    next;
		} else {
		    if ($code > 255) {
			print STDERR "$charset: bad code $code\n";
			undef($charset);  # ignore it
			last;
		    }
		    push(@map, [$code++, $mne]);
		}
	    }
	}
    }
}
map_out();

open(ALIASES, ">maps/aliases") || die "Can't write aliases: $!";
for (sort keys %aliases) {
    delete $aliases{$_}{$_};  # if we managed to get an alias to ourself
    print ALIASES "$_ ", join(" ", sort keys %{$aliases{$_}}), "\n";
}
close(ALIASES);



sub map_out
{
    return unless $charset;

    while ($charset =~ /[^\w\-\.]/) {
	my $orig = $charset;
	if ($charset =~ s/:\d+$// ||
	    $charset =~ s/_\(\d+\)$//)
	{
	    push(@aliases, $orig);
	} else {
            die "Can't wash $charset\n";
        }
    }
    if ($charset =~ /^ISO_8859-(\d+)$/) {
	push(@aliases, "iso8859-$1", "8859-$1");
	# Fix the MACRON vs. OVERLINE bug in these encodings
	if ($1 == 1 || $1 == 4 || $1 == 9) {
	    splice(@map, 0xAF, 0, [0xAF, "'m"]);
	}
    }

    print STDERR "$charset @aliases\n";
    for (@aliases) {
	$aliases{$charset}{$_}++;
    }

    open(BINMAP, "| perl ./map8_txt2bin >maps/$charset.bin") or die;
    binmode BINMAP;
    for (@map) {
	my($code, $mne) = @$_;
	my $x4 = $mnemonics{$mne};
	unless ($x4) {
	    print STDERR "$charset: no code for $mne\n";
	    next;
	}
	if ($code < 0 || $code > 255) {
	    print STDERR "$charset: bad code $code\n";
	    next;
	}
	printf BINMAP "0x%02x 0x%s\n", $code, $x4;
    }
    close(BINMAP);
    
}