The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package MARC::Charset::Table;

=head1 NAME 

MARC::Charset::Table - character mapping db

=head1 SYNOPSIS

    use MARC::Charset::Table;
    use MARC::Charset::Constants qw(:all);

    # create the table object
    my $table = MARC::Charset::Table->new();
   
    # get a code using the marc8 character set code and the character
    my $code = $table->lookup_by_marc8(CYRILLIC_BASIC, 'K');

    # get a code using the utf8 value
    $code = $table->lookup_by_utf8(chr(0x043A));

=head1 DESCRIPTION

MARC::Charset::Table is a wrapper around the character mapping database, 
which is implemented as a tied hash on disk. This database gets generated 
by Makefile.PL on installation of MARC::Charset using 
MARC::Charset::Compiler.

The database is essentially a key/value mapping where a key is a 
MARC-8 character set code + a MARC-8 character, or an integer representing the
UCS code point. These keys map to a serialized MARC::Charset::Code object.

=cut

use strict;
use warnings;
use POSIX;
use GDBM_File;
use MARC::Charset::Code;
use MARC::Charset::Constants qw(:all);
use Storable qw(nfreeze thaw);

=head2 new()

The consturctor.

=cut

sub new
{
    my $class = shift;
    my $self = bless {}, ref($class) || $class;
    $self->_init(&GDBM_READER);
    return $self;
}


=head2 add_code()

Add a MARC::Charset::Code to the table.

=cut


sub add_code
{
    my ($self, $code) = @_;

    # the Code object is serialized
    my $frozen = nfreeze($code);

    # to support lookup by marc8 and utf8 values we 
    # stash away the rule in the db using two keys
    my $marc8_key = $code->marc8_hash_code();
    my $utf8_key = $code->utf8_hash_code();

    # stash away the marc8 lookup key
    $self->{db}->{$marc8_key} = $frozen;

    # stash away the utf8 lookup key (only if it's not already there!)
    # this means that the sets that appear in the xml file will have
    # precedence ascii/ansel
    $self->{db}->{$utf8_key} = $frozen unless exists $self->{db}->{$utf8_key};
}


=head2 get_code()

Retrieve a code using a hash key.

=cut

sub get_code
{
    my ($self, $key) = @_;
    my $db = $self->db();
    my $frozen = $db->{$key};
    return thaw($frozen) if $frozen;
    return undef;
}


=head2 lookup_by_marc8()

Looks up MARC::Charset::Code entry using a character set code and a MARC-8 
value.

    use MARC::Charset::Constants qw(HEBREW);
    $code = $table->lookup_by_marc8(HEBREW, chr(0x60));

=cut

sub lookup_by_marc8
{
    my ($self, $charset, $marc8) = @_;
    $charset = BASIC_LATIN if $charset eq ASCII_DEFAULT;
    return $self->get_code(sprintf('%s:%s', $charset, $marc8));
}


=head2 lookup_by_utf8()

Looks up a MARC::Charset::Code object using a utf8 value.

=cut

sub lookup_by_utf8
{
    my ($self, $value) = @_;
    return $self->get_code(ord($value));
}




=head2 db()

Returns a reference to a tied character database. MARC::Charset::Table
wraps access to the db, but you can get at it if you want.

=cut

sub db 
{
    return shift->{db};
}


=head2 db_path()

Returns the path to the character encoding database. Can be called 
statically too: 

    print MARC::Charset::Table->db_path();

=cut

sub db_path
{
    my $path = $INC{'MARC/Charset/Table.pm'};
    $path =~ s/\.pm$//;
    return $path;
}


=head2 brand_new()

An alternate constructor which removes the existing database and starts
afresh. Be careful with this one, it's really only used on MARC::Charset
installation.

=cut

sub brand_new 
{
    my $class = shift;
    my $self = bless {}, ref($class) || $class;
    $self->_init(&GDBM_WRCREAT);
    return $self;
}


# helper function for initializing table internals

sub _init 
{
    my ($self,$opts) = @_;
    tie my %db, 'GDBM_File', db_path(), $opts, 0644;
    $self->{db} = \%db;
}





1;