#!/usr/bin/perl -w
use strict;
$^W = 1;
use Socket qw (inet_aton inet_ntoa);
my $DEBUG = 0;
my %range;
my $range_count = 0;
my $log2 = log(2);
my @mask;
my @mask_packed;
my %mask_decimal;
for (my $i=0; $i<=31; $i++){
$mask[$i] = pack('B32', ('1'x(32-$i)).('0'x$i));
$mask_packed[$i] = pack('C',$i);
$mask_decimal{pack('C',$i)} = $i;
}
my $ip_match = qr/^(\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(\d|[01]?\d\d|2[0-4]\d|25[0-5])$/o;
my $reg_dir = './';
# 'SPECIAL' IP RANGES (all from RFC3330)
# a double asterix '**' indicates a local (non-public) IP ranges,
# whereas a double dash '--' indicates some other IANA range
# 'Blanket' coverage, with high handicap (ensure complete coverage)
# note that the handicap is less than the default, to ensure that
# no part is overwritten by RIPE database, which contains IANA ranges
insert_raw(unpack('N',inet_aton('0.0.0.0')),2**32,'--',2**31);
# "This" Network [RFC1700, page 4]
insert_raw(unpack('N',inet_aton('0.0.0.0')),2**24,'**',2**24);
# Private-Use Networks [RFC1918]
insert_raw(unpack('N',inet_aton('10.0.0.0')),2**24,'**',2**24);
# Public Data Networks [RFC1700, page 181]
insert_raw(unpack('N',inet_aton('14.0.0.0')),2**24,'--',2**24);
# Loopback [RFC1700, page 5]
insert_raw(unpack('N',inet_aton('127.0.0.0')),2**24,'**',2**24);
# Link Local
insert_raw(unpack('N',inet_aton('169.254.0.0')),2**16,'**',2**16);
# Private-Use Networks [RFC1918]
insert_raw(unpack('N',inet_aton('172.16.0.0')),2**20,'**',2**20);
# Test-Net
insert_raw(unpack('N',inet_aton('192.0.2.0')),2**8,'**',2**8);
# 6to4 Relay Anycast [RFC3068]
insert_raw(unpack('N',inet_aton('192.88.99.0')),2**8,'**',2**8);
# Private-Use Networks [RFC1918]
insert_raw(unpack('N',inet_aton('192.168.0.0')),2**16,'**',2**16);
# Network Interconnect Device Benchmark Testing [RFC2544]
insert_raw(unpack('N',inet_aton('198.18.0.0')),2**17,'--',2**17);
# Multicast [RFC3171]
insert_raw(unpack('N',inet_aton('224.0.0.0')),2**28,'--',2**28);
# Reserved for Future Use [RFC1700, page 4]
insert_raw(unpack('N',inet_aton('240.0.0.0')),2**28,'--',2**28);
read_reg('delegated-afrinic-latest');
read_reg('delegated-lacnic-latest');
read_reg('delegated-apnic-latest');
read_ripe();
read_reg('delegated-arin-latest');
join_neighbours();
punch_holes();
optimize();
output();
sub output
{
open(OUTFILE,"> sorted_countries.txt") or die ($!);
foreach my $key (sort keys %range){
print OUTFILE inet_ntoa(substr($key,0,4)) . '|';
print OUTFILE 2 ** unpack('C',substr($key,4,1)) .'|';
print OUTFILE $range{$key}->{cc} ."\n";
}
close OUTFILE;
}
sub formatRange
{
my ($start,$end,$cc) = @_;
my $ip = pack('N',$start);
my $size = ($end - $start) + 1;
while (1){
my $mask = int(log($size)/log(2));
my $max_mask = get_max_mask($ip);
if ($max_mask < $mask){
$mask = $max_mask;
}
print OUTFILE inet_ntoa($ip).'|'. 2 ** $mask .'|'. $cc ."\n";
$size = $size - (2 ** $mask);
return unless ($size > 0);
$ip = pack('N',(unpack('N',$ip) + 2 ** $mask));
}
}
sub optimize
{
print STDERR "performing final optimizations\n";
my $repeat = 1;
while ($repeat){
$repeat = 0;
my @key = sort keys %range;
my $one = $key[0];
for (my $i = 1; $i<=$#key; $i++){
my $two = $key[$i];
if (exists $range{$one}){
my $one_mask_decimal = $mask_decimal{substr($one,4,1)};
my $two_mask_decimal = $mask_decimal{substr($two,4,1)};
if (($one_mask_decimal == $two_mask_decimal) &&
($range{$one}->{cc} eq $range{$two}->{cc})){
my $one_ip_packed = substr($one,0,4);
my $two_ip_packed = substr($two,0,4);
if (($one_ip_packed & $mask[$one_mask_decimal + 1]) eq ($two_ip_packed & $mask[$two_mask_decimal + 1])){
my $one_ip_decimal = unpack('N',substr($one,0,4));
my $two_ip_decimal = unpack('N',substr($two,0,4));
insert_raw($one_ip_decimal, 2 ** ($one_mask_decimal + 1), $range{$one}->{cc}, $range{$one}->{handicap});
delete $range{$one};
delete $range{$two};
$repeat++;
}
}
}
$one = $two;
}
print STDERR " repeating ($repeat joins)\n" if $repeat;
}
}
sub join_neighbours
{
print STDERR "optimizing by joining neighbouring ranges\n";
my $repeat = 1;
while ($repeat){
$repeat = 0;
my @key = sort keys %range;
my $one = $key[0];
for (my $i = 1; $i<=$#key; $i++){
my $two = $key[$i];
if (exists $range{$one}){
my $one_mask_decimal = $mask_decimal{substr($one,4,1)};
my $two_mask_decimal = $mask_decimal{substr($two,4,1)};
if (($one_mask_decimal == $two_mask_decimal) &&
($range{$one}->{cc} eq $range{$two}->{cc}) &&
($range{$one}->{handicap} eq $range{$two}->{handicap})){
my $one_ip_packed = substr($one,0,4);
my $two_ip_packed = substr($two,0,4);
if (($one_ip_packed & $mask[$one_mask_decimal + 1]) eq ($two_ip_packed & $mask[$two_mask_decimal + 1])){
my $one_ip_decimal = unpack('N',substr($one,0,4));
my $two_ip_decimal = unpack('N',substr($two,0,4));
insert_raw($one_ip_decimal, 2 ** ($one_mask_decimal + 1), $range{$one}->{cc}, $range{$one}->{handicap});
delete $range{$one};
delete $range{$two};
$repeat++;
}
}
}
$one = $two;
}
print STDERR " repeating ($repeat joins)\n" if $repeat;
}
}
sub punch_holes
{
print STDERR "removing overlapping ranges\n";
my $repeat = 1;
while ($repeat) {
$repeat = 0;
foreach my $one (keys %range){
next unless (exists $range{$one}); # we're deleting, so need to be careful
if (defined (my $two = get_existing_range($one))){
$repeat++;
if ($DEBUG){
print STDERR ">>>>MATCH: ". inet_ntoa(substr($one,0,4)).'/'.unpack('C',substr($one,4,1));
print STDERR " and ". inet_ntoa(substr($two,0,4)).'/'.unpack('C',substr($two,4,1));
print STDERR "\n";
}
my $one_mask_decimal = $mask_decimal{substr($one,4,1)};
my $two_mask_decimal = $mask_decimal{substr($two,4,1)};
if ($one_mask_decimal > $two_mask_decimal){
punch_hole($one,$two);
} else {
punch_hole($two,$one);
}
}
}
print STDERR " repeating ($repeat overlapping ranges)\n" if $repeat;
}
}
sub punch_hole
{
my ($larger,$smaller) = @_;
my $larger_mask_decimal = $mask_decimal{substr($larger,4,1)};
my $smaller_mask_decimal = $mask_decimal{substr($smaller,4,1)};
my $larger_handicap = $range{$larger}->{handicap};
my $smaller_handicap = $range{$smaller}->{handicap};
if ($larger_handicap <= $smaller_handicap){
# $larger is less handicapped, therefore $smaller
# is deleted
if ($DEBUG){
print STDERR ">>>>removing ". inet_ntoa(substr($smaller,0,4)).'/'.unpack('C',substr($smaller,4,1));
print STDERR " in favour of ". inet_ntoa(substr($larger,0,4)).'/'.unpack('C',substr($larger,4,1));
print STDERR "\n";
}
delete $range{$smaller};
$range_count--;
} else {
# $smaller is less handicapped, therefore a hole
# should be cut for it in $larger
my $larger_cc = $range{$larger}->{cc};
if ($DEBUG){
print STDERR ">>>>deleting: ". inet_ntoa(substr($larger,0,4)).'/'.unpack('C',substr($larger,4,1));
print STDERR "\n";
}
delete $range{$larger};
$range_count--;
my $larger_ip_packed = substr($larger,0,4);
my $larger_ip_decimal_start = unpack('N',$larger_ip_packed);
my $smaller_ip_packed = substr($smaller,0,4);
my $smaller_ip_decimal_start = unpack('N',$smaller_ip_packed);
if ($larger_ip_decimal_start < $smaller_ip_decimal_start){
if ($DEBUG){
print STDERR ">>>>creating: ". inet_ntoa(pack('N',$larger_ip_decimal_start)).'/'.
int(log($smaller_ip_decimal_start-$larger_ip_decimal_start)/$log2);
print STDERR "\n";
}
insert_raw($larger_ip_decimal_start,$smaller_ip_decimal_start-$larger_ip_decimal_start,$larger_cc,$larger_handicap);
}
my $larger_ip_decimal_end = $larger_ip_decimal_start + (2 ** $larger_mask_decimal) - 1;
my $smaller_ip_decimal_end = $smaller_ip_decimal_start + (2 ** $smaller_mask_decimal) - 1;
if ($larger_ip_decimal_end > $smaller_ip_decimal_end){
if ($DEBUG){
print STDERR ">>>>creating: ". inet_ntoa(pack('N',$smaller_ip_decimal_end+1)).'/'.
int(log($larger_ip_decimal_end-$smaller_ip_decimal_end)/$log2);
print STDERR "\n";
}
insert_raw($smaller_ip_decimal_end+1,$larger_ip_decimal_end-$smaller_ip_decimal_end,$larger_cc,$larger_handicap);
}
}
}
sub get_existing_range
{
my $key = shift;
my $ip_packed = substr($key,0,4);
for (my $i = 31; $i>=0; $i--){
my $existing_key = ($ip_packed & $mask[$i]) . $mask_packed[$i];
next if $existing_key eq $key;
if (exists $range{$existing_key}){
return $existing_key;
}
}
return undef;
}
sub insert_raw
{
my ($ip_decimal,$size,$cc,$handicap) = @_;
while ($size > 0){
my $ip_packed = pack('N',$ip_decimal);
my $max_mask = get_max_mask($ip_packed);
if ((2 ** $max_mask) > $size){
$max_mask = int(log($size)/$log2);
}
add_range($ip_packed,$mask_packed[$max_mask],$cc,$handicap);
$ip_decimal += (2 ** $max_mask);
$size -= (2 ** $max_mask);
}
}
sub add_range
{
my ($ip_packed, $mask_packed, $cc, $handicap) = @_;
my $key = $ip_packed . $mask_packed;
if (exists $range{$key}){
my $existing = $range{$key};
if ($existing->{handicap} > $handicap){
$range{$ip_packed . $mask_packed} = {cc => $cc, handicap => $handicap};
$range_count++;
}
} else {
$range{$ip_packed . $mask_packed} = {cc => $cc, handicap => $handicap};
$range_count++;
}
}
sub get_max_mask
{
my $ip_packed = shift;
for (my $i = 31; $i>=0; $i--){
return $i
if (($ip_packed | $mask[$i]) eq $mask[$i]);
}
die("strange IP: ". inet_ntoa($ip_packed));
}
sub read_ripe
{
print STDERR "loading data from ripe.db.inetnum\n";
my $ripe_inet_line = qr/^inetnum:\s+(\S+)\s*-\s*(\S+)/o;
my $ripe_cc_line = qr/^country:\s+(\S\S)/o;
open (REG,"< $reg_dir/ripe.db.inetnum") or die("can't open $reg_dir/ripe.db.inetnum: $!");
binmode REG, ':crlf';
{
my $start;
my $end;
my $cc;
my $status;
while (my $line = <REG>){
if (defined $start){
next unless $line =~ $ripe_cc_line;
$cc = uc $1;
$cc = 'GB' if ($cc eq 'UK');
insert_raw($start,$end-$start+1,$cc,$end-$start+1);
$start = undef;
$end = undef;
} elsif ($line =~ $ripe_inet_line){
my ($a_start,$a_end) = ($1,$2);
if ($a_start =~ $ip_match){
$start = ($1 * 16777216) + ($2 * 65536) + ($3 * 256) + $4;
}
if ($a_end =~ $ip_match){
$end = ($1 * 16777216) + ($2 * 65536) + ($3 * 256) + $4;
}
die($line) unless ((defined $start) && (defined $end));
} else {
}
}
}
close REG || warn("can't close $reg_dir/ripe.db.inetnum, but continuing: $!");
}
sub read_reg
{
my $path = shift;
open (REG, "< $reg_dir/$path") || die("can't open $reg_dir/$path: $!");
binmode REG, ':crlf';
print STDERR "loading data from $path\n";
my $stat_line = qr/^([^\|]+)\|(..)\|ipv4\|([^\|]+)\|(\d+)\|/o;
while (my $line = <REG>){
chomp $line;
next unless $line =~ $stat_line;
my ($auth,$cc,$ip,$size) = ($1,uc $2,$3,$4);
next unless ($ip =~ $ip_match);
my $start = ($1 * 16777216) + ($2 * 65536) + ($3 * 256) + $4;
$cc = 'GB' if ($cc eq 'UK');
insert_raw($start,$size,$cc,$size);
}
close REG || warn("can't close $reg_dir/$path, but continuing: $!");
}