#!/usr/bin/env perl
use strict;
use warnings;
use autodie;
use utf8;
use Carp qw( croak );
use Cwd qw( abs_path );
use File::Basename qw( dirname );
use JSON::XS;
use Math::Int128 qw( uint128 );
use MaxMind::DB::Writer::Serializer 0.050000;
use MaxMind::DB::Writer::Tree 0.050000;
use Net::Works::Network;
use Test::MaxMind::DB::Common::Util qw( standard_test_metadata );
my $Dir = dirname( abs_path($0) );
sub main {
my @sizes = ( 24, 28, 32 );
my @ipv4_range = ( '1.1.1.1', '1.1.1.32' );
my @ipv4_subnets = Net::Works::Network->range_as_subnets(@ipv4_range);
for my $record_size (@sizes) {
write_test_db(
$record_size,
\@ipv4_subnets,
{ ip_version => 4 },
'ipv4',
);
}
write_broken_pointers_test_db(
24,
\@ipv4_subnets,
{ ip_version => 4 },
'broken-pointers',
);
my @ipv6_subnets = Net::Works::Network->range_as_subnets(
'::1:ffff:ffff',
'::2:0000:0059'
);
for my $record_size (@sizes) {
write_test_db(
$record_size,
\@ipv6_subnets,
{ ip_version => 6 },
'ipv6',
);
write_test_db(
$record_size,
[
@ipv6_subnets,
Net::Works::Network->range_as_subnets( @ipv4_range, 6 ),
],
{ ip_version => 6 },
'mixed',
);
}
write_decoder_test_db();
write_deeply_nested_structures_db();
write_geoip2_city_db();
write_broken_geoip2_city_db();
write_no_ipv4_tree_db();
write_no_map_db(\@ipv4_subnets);
write_test_serialization_data();
}
sub write_broken_pointers_test_db {
no warnings 'redefine';
my $orig_store_data = MaxMind::DB::Writer::Serializer->can('store_data');
# This breaks the value of the record for the 1.1.1.32 network, causing it
# to point outside the database.
local *MaxMind::DB::Writer::Serializer::store_data = sub {
my $data_pointer = shift->$orig_store_data(@_);
my $value = $_[1];
if ( ref($value) eq 'HASH'
&& exists $value->{ip}
&& $value->{ip} eq '1.1.1.32' ) {
$data_pointer += 100_000;
}
return $data_pointer;
};
# The next hack will poison the data section for the 1.1.16/28 subnet
# value. It's value will be a pointer that resolves to an offset outside
# the database.
my $key_to_poison = 'evbclUZX3mjlqxDihL+JNr+276s';
my $orig_position_for_data
= MaxMind::DB::Writer::Serializer->can('_position_for_data');
local *MaxMind::DB::Writer::Serializer::_position_for_data = sub {
my $key = $_[1];
if ( $key eq $key_to_poison ) {
return 1_000_000;
}
else {
return shift->$orig_position_for_data(@_);
}
};
write_test_db(@_);
}
sub write_test_db {
my $record_size = shift;
my $subnets = shift;
my $metadata = shift;
my $ip_version_name = shift;
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => $subnets->[0]->version(),
record_size => $record_size,
alias_ipv6_to_ipv4 => ( $subnets->[0]->version() == 6 ? 1 : 0 ),
map_key_type_callback => sub { 'utf8_string' },
standard_test_metadata(),
%{$metadata},
);
for my $subnet ( @{$subnets} ) {
$writer->insert_network(
$subnet,
{ ip => $subnet->first()->as_string() }
);
}
my $filename = sprintf(
"$Dir/MaxMind-DB-test-%s-%i.mmdb",
$ip_version_name,
$record_size,
);
open my $fh, '>', $filename;
$writer->write_tree($fh);
close $fh;
return ( $writer, $filename );
}
{
# We will store this once for each subnet so we will also be testing
# pointers, since the serializer will generate a pointer to this
# structure.
my %all_types = (
utf8_string => 'unicode! ☯ - ♫',
double => 42.123456,
bytes => pack( 'N', 42 ),
uint16 => 100,
uint32 => 2**28,
int32 => -1 * ( 2**28 ),
uint64 => uint128(1) << 60,
uint128 => uint128(1) << 120,
array => [ 1, 2, 3, ],
map => {
mapX => {
utf8_stringX => 'hello',
arrayX => [ 7, 8, 9 ],
},
},
boolean => 1,
float => 1.1,
);
my %all_types_0 = (
utf8_string => q{},
double => 0,
bytes => q{},
uint16 => 0,
uint32 => 0,
int32 => 0,
uint64 => uint128(0),
uint128 => uint128(0),
array => [],
map => {},
boolean => 0,
float => 0,
);
sub write_decoder_test_db {
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 6,
record_size => 24,
database_type => 'MaxMind DB Decoder Test',
languages => ['en'],
description => {
en =>
'MaxMind DB Decoder Test database - contains every MaxMind DB data type',
},
alias_ipv6_to_ipv4 => 1,
map_key_type_callback => sub {
my $key = $_[0];
$key =~ s/X$//;
return $key eq 'array' ? [ 'array', 'uint32' ] : $key;
}
);
my @subnets
= map { Net::Works::Network->new_from_string( string => $_ ) }
qw(
::1.1.1.0/120
::2.2.0.0/112
::3.0.0.0/104
::4.5.6.7/128
abcd::/64
1000::1234:0000/112
);
for my $subnet (@subnets) {
$writer->insert_network(
$subnet,
\%all_types,
);
}
$writer->insert_network(
Net::Works::Network->new_from_string( string => '::0.0.0.0/128' ),
\%all_types_0,
);
open my $fh, '>', "$Dir/MaxMind-DB-test-decoder.mmdb";
$writer->write_tree($fh);
close $fh;
return;
}
}
{
my %nested = (
map1 => {
map2 => {
array => [
{
map3 => { a => 1, b => 2, c => 3 },
},
],
},
},
);
sub write_deeply_nested_structures_db {
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 6,
record_size => 24,
ip_version => 6,
database_type => 'MaxMind DB Nested Data Structures',
languages => ['en'],
description => {
en =>
'MaxMind DB Nested Data Structures Test database - contains deeply nested map/array structures',
},
alias_ipv6_to_ipv4 => 1,
map_key_type_callback => sub {
my $key = shift;
return
$key =~ /^map/ ? 'map'
: $key eq 'array' ? [ 'array', 'map' ]
: 'uint32';
}
);
my @subnets
= map { Net::Works::Network->new_from_string( string => $_ ) }
qw(
::1.1.1.0/120
::2.2.0.0/112
::3.0.0.0/104
::4.5.6.7/128
abcd::/64
1000::1234:0000/112
);
for my $subnet (@subnets) {
$writer->insert_network(
$subnet,
\%nested,
);
}
open my $fh, '>', "$Dir/MaxMind-DB-test-nested.mmdb";
$writer->write_tree($fh);
close $fh;
return;
}
}
sub write_geoip2_city_db {
_write_geoip2_city_db('Test');
}
sub write_broken_geoip2_city_db {
no warnings 'redefine';
# This is how we _used_ to encode doubles. Storing them this way with the
# current reader tools can lead to weird errors. This broken database is a
# good way to test the robustness of reader code in the face of broken
# databases.
local *MaxMind::DB::Writer::Serializer::_encode_double = sub {
my $self = shift;
my $value = shift;
$self->_simple_encode( double => $value );
};
_write_geoip2_city_db('Test Broken Double Format');
}
{
my %type_map = (
cellular => 'uint16',
city => 'map',
continent => 'map',
country => 'map',
geoname_id => 'uint32',
is_anonymous_proxy => 'boolean',
is_satellite_provider => 'boolean',
latitude => 'double',
location => 'map',
longitude => 'double',
metro_code => 'uint16',
names => 'map',
postal => 'map',
registered_country => 'map',
represented_country => 'map',
subdivisions => [ 'array', 'map' ],
traits => 'map',
);
my $type_cb = sub { $type_map{ $_[0] } // 'utf8_string' };
sub _write_geoip2_city_db {
my $description = shift;
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 6,
record_size => 28,
ip_version => 6,
database_type => 'GeoIP2 City',
languages => [ 'en', 'zh' ],
description => {
en =>
"GeoIP2 City $description Database (a small sample of real GeoIP2 data)",
zh => '小型数据库',
},
alias_ipv6_to_ipv4 => 1,
map_key_type_callback => $type_cb,
);
my $json = JSON::XS->new()->utf8();
open my $source_fh, '<', "$Dir/../source-data/GeoIP2-City-Test.json";
while (<$source_fh>) {
my ( $network, $record ) = @{ $json->decode($_) };
$writer->insert_network(
Net::Works::Network->new_from_string( string => $network ),
$record
);
}
close $source_fh;
my $suffix = $description =~ s/ /-/gr;
open my $output_fh, '>', "$Dir/GeoIP2-City-$suffix.mmdb";
$writer->write_tree($output_fh);
close $output_fh;
return;
}
}
sub write_no_ipv4_tree_db {
my $subnets = shift;
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 6,
record_size => 24,
ip_version => 6,
database_type => 'MaxMind DB No IPv4 Search Tree',
languages => ['en'],
description => {
en => 'MaxMind DB No IPv4 Search Tree',
},
root_data_type => 'utf8_string',
);
my $subnet = Net::Works::Network->new_from_string( string => '::/64' );
$writer->insert_network( $subnet, $subnet->as_string() );
open my $output_fh, '>', "$Dir/MaxMind-DB-no-ipv4-search-tree.mmdb";
$writer->write_tree($output_fh);
close $output_fh;
return;
}
# The point of this database is to provide something where we can test looking
# up a single value. In other words, each IP address points to a non-compound
# value, a string rather than a map or array.
sub write_no_map_db {
my $subnets = shift;
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 4,
record_size => 24,
database_type => 'MaxMind DB String Value Entries',
languages => ['en'],
description => {
en =>
'MaxMind DB String Value Entries (no maps or arrays as values)',
},
root_data_type => 'utf8_string',
);
for my $subnet (@{$subnets}) {
$writer->insert_network( $subnet, $subnet->as_string() );
}
open my $output_fh, '>', "$Dir/MaxMind-DB-string-value-entries.mmdb";
$writer->write_tree($output_fh);
close $output_fh;
return;
}
sub write_test_serialization_data {
my $serializer = MaxMind::DB::Writer::Serializer->new(
map_key_type_callback => sub { 'utf8_string' } );
$serializer->store_data( map => { long_key => 'long_value1' } );
$serializer->store_data( map => { long_key => 'long_value2' } );
$serializer->store_data( map => { long_key2 => 'long_value1' } );
$serializer->store_data( map => { long_key2 => 'long_value2' } );
$serializer->store_data( map => { long_key => 'long_value1' } );
$serializer->store_data( map => { long_key2 => 'long_value2' } );
open my $fh, '>', 'maps-with-pointers.raw';
print {$fh} ${ $serializer->buffer() }
or die "Cannot write to maps-with-pointers.raw: $!";
close $fh;
return;
}
main();