The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package MaxMind::DB::Reader::Decoder;
$MaxMind::DB::Reader::Decoder::VERSION = '0.050005';
use strict;
use warnings;
use namespace::autoclean;
use autodie;

use Carp qw( confess );
use Data::IEEE754 qw( unpack_double_be unpack_float_be );
use Encode ();
use MaxMind::DB::Common 0.031000 qw( %TypeNumToName );
use MaxMind::DB::Reader::Data::Container;
use MaxMind::DB::Reader::Data::EndMarker;
use Math::Int128 qw( uint128 );
use MaxMind::DB::Types qw( Int );

use Moo;
use MooX::StrictConstructor;

with 'MaxMind::DB::Role::Debugs', 'MaxMind::DB::Reader::Role::Sysreader';

use constant DEBUG => $ENV{MAXMIND_DB_DECODER_DEBUG};

# This is a constant so that outside of testing any references to it can be
# optimised away by the compiler.
use constant POINTER_TEST_HACK => $ENV{MAXMIND_DB_POINTER_TEST_HACK};

binmode STDERR, ':utf8'
    if DEBUG;

has _pointer_base => (
    is       => 'ro',
    isa      => Int,
    init_arg => 'pointer_base',
    default  => 0,
);

sub decode {
    my $self   = shift;
    my $offset = shift;

    confess 'You must provide an offset to decode from when calling ->decode'
        unless defined $offset;

    confess
        q{The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)}
        if $offset >= $self->_data_source_size;

    if (DEBUG) {
        $self->_debug_newline();
        $self->_debug_string( 'Offset', $offset );
    }

    my $ctrl_byte;
    $self->_read( \$ctrl_byte, $offset, 1 );
    $offset++;

    $self->_debug_binary( 'Control byte', $ctrl_byte )
        if DEBUG;

    $ctrl_byte = unpack( C => $ctrl_byte );

    # The type is encoded in the first 3 bits of the byte.
    my $type = $TypeNumToName{ $ctrl_byte >> 5 };

    $self->_debug_string( 'Type', $type )
        if DEBUG;

    # Pointers are a special case, we don't read the next $size bytes, we use
    # the size to determine the length of the pointer and then follow it.
    if ( $type eq 'pointer' ) {
        my ( $pointer, $new_offset )
            = $self->_decode_pointer( $ctrl_byte, $offset );

        return $pointer if POINTER_TEST_HACK;

        my $value = $self->decode($pointer);
        return wantarray
            ? ( $value, $new_offset )
            : $value;
    }

    if ( $type eq 'extended' ) {
        my $next_byte;
        $self->_read( \$next_byte, $offset, 1 );

        $self->_debug_binary( 'Next byte', $next_byte )
            if DEBUG;

        my $type_num = unpack( C => $next_byte ) + 7;
        confess
            "Something went horribly wrong in the decoder. An extended type resolved to a type number < 8 ($type_num)"
            unless $type_num >= 8;

        $type = $TypeNumToName{$type_num};
        $offset++;
    }

    ( my $size, $offset )
        = $self->_size_from_ctrl_byte( $ctrl_byte, $offset );

    $self->_debug_string( 'Size', $size )
        if DEBUG;

    # The map and array types are special cases, since we don't read the next
    # $size bytes. For all other types, we do.
    return $self->_decode_map( $size, $offset )
        if $type eq 'map';

    return $self->_decode_array( $size, $offset )
        if $type eq 'array';

    return $self->_decode_boolean( $size, $offset )
        if $type eq 'boolean';

    my $buffer;
    $self->_read( \$buffer, $offset, $size )
        if $size;

    $self->_debug_binary( 'Buffer', $buffer )
        if DEBUG;

    my $method = '_decode_' . $type;
    return wantarray
        ? ( $self->$method( $buffer, $size ), $offset + $size )
        : $self->$method( $buffer, $size );
}

my %pointer_value_offset = (
    1 => 0,
    2 => 2**11,
    3 => 2**19 + 2**11,
    4 => 0,
);

sub _decode_pointer {
    my $self      = shift;
    my $ctrl_byte = shift;
    my $offset    = shift;

    my $pointer_size = ( ( $ctrl_byte >> 3 ) & 0b00000011 ) + 1;

    $self->_debug_string( 'Pointer size', $pointer_size )
        if DEBUG;

    my $buffer;
    $self->_read( \$buffer, $offset, $pointer_size );

    $self->_debug_binary( 'Buffer', $buffer )
        if DEBUG;

    my $packed
        = $pointer_size == 4
        ? $buffer
        : ( pack( C => $ctrl_byte & 0b00000111 ) ) . $buffer;

    $packed = $self->_zero_pad_left( $packed, 4 );

    $self->_debug_binary( 'Packed pointer', $packed )
        if DEBUG;

    my $pointer = unpack( 'N' => $packed ) + $self->_pointer_base();
    $pointer += $pointer_value_offset{$pointer_size};

    $self->_debug_string( 'Pointer to', $pointer )
        if DEBUG;

    return ( $pointer, $offset + $pointer_size );
}

sub _decode_utf8_string {
    my $self   = shift;
    my $buffer = shift;
    my $size   = shift;

    return q{} if $size == 0;

    return Encode::decode( 'utf-8', $buffer, Encode::FB_CROAK );
}

sub _decode_double {
    my $self   = shift;
    my $buffer = shift;
    my $size   = shift;

    $self->_verify_size( 8, $size );
    return unpack_double_be($buffer);
}

sub _decode_float {
    my $self   = shift;
    my $buffer = shift;
    my $size   = shift;

    $self->_verify_size( 4, $size );
    return unpack_float_be($buffer);
}

sub _decode_bytes {
    my $self   = shift;
    my $buffer = shift;
    my $size   = shift;

    return q{} if $size == 0;

    return $buffer;
}

sub _decode_uint16 {
    my $self   = shift;
    my $buffer = shift;
    my $size   = shift;

    return $self->_decode_uint( $buffer, $size, 4 );
}

sub _decode_uint32 {
    my $self   = shift;
    my $buffer = shift;
    my $size   = shift;

    return $self->_decode_uint( $buffer, $size, 4 );
}

sub _decode_map {
    my $self   = shift;
    my $size   = shift;
    my $offset = shift;

    $self->_debug_string( 'Map size', $size )
        if DEBUG;

    my %map;
    for ( 1 .. $size ) {
        ( my $key, $offset ) = $self->decode($offset);
        ( my $val, $offset ) = $self->decode($offset);

        if (DEBUG) {
            $self->_debug_string( "Key $_",   $key );
            $self->_debug_string( "Value $_", $val );
        }

        $map{$key} = $val;
    }

    $self->_debug_structure( 'Decoded map', \%map )
        if DEBUG;

    return wantarray ? ( \%map, $offset ) : \%map;
}

sub _decode_int32 {
    my $self   = shift;
    my $buffer = shift;
    my $size   = shift;

    return 0 if $size == 0;

    return unpack( 'N!' => $self->_zero_pad_left( $buffer, 4 ) );
}

sub _decode_uint64 {
    my $self   = shift;
    my $buffer = shift;
    my $size   = shift;

    return $self->_decode_uint( $buffer, $size, 8 );
}

sub _decode_uint128 {
    my $self   = shift;
    my $buffer = shift;
    my $size   = shift;

    return $self->_decode_uint( $buffer, $size, 16 );
}

sub _decode_uint {
    my $self   = shift;
    my $buffer = shift;
    my $size   = shift;
    my $bytes  = shift;

    if (DEBUG) {
        $self->_debug_string( 'UINT size',  $size );
        $self->_debug_string( 'UINT bytes', $bytes );
        $self->_debug_binary( 'Buffer', $buffer );
    }

    if ( $bytes == 4 ) {
        return 0 if $size == 0;
        return unpack( 'N' => $self->_zero_pad_left( $buffer, $bytes ) );
    }
    else {
        my $int = uint128(0);

        return $int if $size == 0;

        my @unpacked = unpack( 'NNNN', $self->_zero_pad_left( $buffer, 16 ) );
        for my $piece (@unpacked) {
            $int = ( $int << 32 ) | $piece;
        }

        return $int;
    }
}

sub _decode_array {
    my $self   = shift;
    my $size   = shift;
    my $offset = shift;

    $self->_debug_string( 'Array size', $size )
        if DEBUG;

    my @array;
    for ( 1 .. $size ) {
        ( my $val, $offset ) = $self->decode($offset);

        if (DEBUG) {
            $self->_debug_string( "Value $_", $val );
        }

        push @array, $val;
    }

    $self->_debug_structure( 'Decoded array', \@array )
        if DEBUG;

    return wantarray ? ( \@array, $offset ) : \@array;
}

sub _decode_container {
    return MaxMind::DB::Reader::Data::Container->new();
}

sub _decode_end_marker {
    return MaxMind::DB::Reader::Data::EndMarker->new();
}

sub _decode_boolean {
    my $self   = shift;
    my $size   = shift;
    my $offset = shift;

    return wantarray ? ( $size, $offset ) : $size;
}

sub _verify_size {
    my $self     = shift;
    my $expected = shift;
    my $actual   = shift;

    confess
        q{The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)}
        unless $expected == $actual;
}

sub _size_from_ctrl_byte {
    my $self      = shift;
    my $ctrl_byte = shift;
    my $offset    = shift;

    my $size = $ctrl_byte & 0b00011111;
    return ( $size, $offset )
        if $size < 29;

    my $bytes_to_read = $size - 28;

    my $buffer;
    $self->_read( \$buffer, $offset, $bytes_to_read );

    if ( $size == 29 ) {
        $size = 29 + unpack( 'C', $buffer );
    }
    elsif ( $size == 30 ) {
        $size = 285 + unpack( 'n', $buffer );
    }
    else {
        $size = 65821 + unpack( 'N', $self->_zero_pad_left( $buffer, 4 ) );
    }

    return ( $size, $offset + $bytes_to_read );
}

sub _zero_pad_left {
    my $self           = shift;
    my $content        = shift;
    my $desired_length = shift;

    return ( "\x00" x ( $desired_length - length($content) ) ) . $content;
}

__PACKAGE__->meta()->make_immutable();

1;