The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package BerkeleyDB::Lite ;

#     Copyright (c) 1997-2001 Jim Schueler. All rights reserved.
#     This program is free software; you can redistribute it and/or
#     modify it under the same terms as Perl itself.
#

# The documentation for this module is at the bottom of this file,
# after the line __END__.

use BerkeleyDB ;
use Storable qw( nfreeze thaw ) ;
use Carp ;

use 5.006;
use strict;
use warnings;

require Exporter;

our $VERSION = '1.1';
## See Changes file

our @defaults = ( 
		rootdir => "/usr/local/apache/cgi-bin/db" 
		) ;

our ( %env, %dbreg ) ;

our @ISA = qw( Exporter );

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

# This allows declaration	use BerkeleyDB::Lite::Hash ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.

our %EXPORT_TAGS = ( 'all' => [ qw() ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw( duplicatekeys incrementkeys uniquepairs uniquekeys );

## Problems have occured during global destruction if Env objects are
## closed before database objects.  %dbreg closes all open databases 
## first.
END {
	closeall() ;
	}

## Run closeall if forking child processes.  All references must be destroyed
## first.
sub closeall {
	map { eval { $_->db_close } } values %dbreg ;
	%env = () ;
	}

sub new {
	my $invocator = shift ;
	my $class = ref $invocator || $invocator ;

	my $self = $class->scalars( @_ ) ;
	my $ref = tied %$self ;

	return undef unless $ref ;

	$ref->filter_store_value (
			sub {
				$_ = nfreeze( ref $_? $_: \$_ ) ;
				} ) ;

	$ref->filter_fetch_value (
			sub {
				$_ = thaw $_ ;
				} ) ;

	return bless $self, $class ;
	}

sub uniquekeys {
	return 1 ;
	}

sub duplicatekeys {
	return property => DB_DUP ;
	}

sub uniquepairs {
	return 	property => DB_DUP | DB_DUPSORT ;
	}

sub incrementkeys {
	return compare => sub { ( $_[0] || 0 ) <=> ( $_[1] || 0 ) } ;
	}

sub envsetup {
	my %config = ( @defaults, @_ ) ;
	croak "'home' not defined" unless $config{home} ;
	croak "'filename' not defined" unless $config{filename} ;

	### Environment ###
	my %args_env = () ;
	$args_env{'-Cachesize'} = $config{cachesize}
			if exists $config{cachesize} ;
	if ( exists $config{server} ) {
		$args_env{'-Server'} = $config{server} ;
		$args_env{'-Home'} = $config{home} ;
		}
	else {
		$args_env{'-Home'} = "$config{rootdir}/$config{home}" ;
		}

	my $flags = DB_CREATE | DB_INIT_MPOOL | DB_INIT_CDB ;
	$flags ||= DB_RECOVER if $config{recover} ;

	$env{ $config{home} } ||= new BerkeleyDB::Env
			%args_env,
			-Flags => $flags,
			or warn "$!" ;

	### Database ###
	my %args_db = () ;
	$args_db{filename} = "$config{rootdir}/$config{home}/$config{filename}"
			unless $config{server} ;
	$args_db{'-Env'} = $env{ $config{home} } ;
	$args_db{'-Filename'} = $config{filename} ;
	$args_db{'-Property'} = $config{property} if exists $config{property} ;
	$args_db{'-Compare'} = $config{compare} if exists $config{compare} ;

	return %args_db ;
	}

## intended for duplicatekeys
sub recordset {
	my $ref = shift ;
	my $self = tied %$ref ;
	my $key = shift ;
	my $value = "" ;

	my @values = () ;

	# database locked
	my $cursor = $self->db_cursor ;

	if ( $cursor->c_get( $key, $value, DB_SET ) ) {
		$cursor = undef ;
		return @values ;
		}

	push @values, $value ;
	while ( ! $cursor->c_get( $key, $value, DB_NEXT_DUP ) ) {
		push @values, $value ;
		}

	$cursor = undef ;
	return @values ;
	}

## experimental to improve durability
sub sync {
	my $ref = shift ;
	my $self = tied %$ref ;
	$self->db_sync ;
	}

sub syncall {
	map { $_->db_sync } values %dbreg ;
	}

## intended for duplicatekeys
sub delete {
	my $ref = shift ;
	my $self = tied %$ref ;
	my $key = shift ;
	my $value = shift ;
	my $orig = $value ;

	my $cursor = $self->db_cursor( DB_WRITECURSOR ) ;
	my $status = $cursor->c_get( $key, $value, DB_GET_BOTH ) ;

	## Warning: Ensure consistency between numbers with strings.
	## See Storable documentation.
	$cursor->c_del unless $status ;
	$cursor = undef ;

	return $status ;
	}

sub DESTROY {
	my $ref = shift ;
	my $self = tied %$ref ;
	return unless $self ;

	delete $dbreg{ $self->[0] } ;

	## Sleepycat suggests that as an RPC client, db_close is a noop.
	## In that case, db_sync ought to be called instead.
	## How to check whether db's env uses RPC?
	eval { $self->db_sync } ;

	eval { $self->db_close } ;
	}


package BerkeleyDB::Lite::Hash ;

use BerkeleyDB ;
use Carp ;

our @ISA = qw( BerkeleyDB::Lite ) ;

sub scalars {
	my $invocator = shift ;
	my $class = ref $invocator || $invocator ;
	my( $self, %self ) ;

	my %env = BerkeleyDB::Lite::envsetup( @_ ) ;

	my %alt = @_ ;
	my $filename = $env{filename} ;
	delete $env{filename} ;

	### Table ###
	$self = tie %self, $alt{subclass} || 'BerkeleyDB::Hash',
			%env,
			-Flags => DB_CREATE, 
			or warn "($$) $filename: $!"
			if $env{'-Env'} ;

	## Failsafe- open R/O without locking
	if ( $filename && ! $self ) {
		delete $env{ '-Filename' } ;
		delete $env{ '-Env' } ;
	
		$self = tie %self, 'BerkeleyDB::Hash',
				-Filename => $filename,
				-Flags => DB_RDONLY, 
				%env,
				or warn "$filename: $! (readonly)" ;
		}

	$dbreg{ $self->[0] } = $self if ref $self ;
	return bless \%self, $class ;
	}


package BerkeleyDB::Lite::Btree ;

use BerkeleyDB ;
use Carp ;

our @ISA = qw( BerkeleyDB::Lite ) ;

sub lexical {
	my $invocator = shift ;
	my $class = ref $invocator || $invocator ;

	return BerkeleyDB::Lite::Btree::Lexical->new( @_ ) ;
	}

sub scalars {
	my $invocator = shift ;
	my $class = ref $invocator || $invocator ;
	my( $self, %self ) ;

	my %env = BerkeleyDB::Lite::envsetup( @_ ) ;

	my %alt = @_ ;
	my $filename = $env{filename} ;
	delete $env{filename} ;

	### Table ###
	$self = tie %self, $alt{subclass} || 'BerkeleyDB::Btree',
			%env,
			-Flags => DB_CREATE, 
			or warn "$filename $!"
			if $env{'-Env'} ;

	## Failsafe- open R/O without locking
	if ( $filename && ! $self ) {
		delete $env{ '-Filename' } ;
		delete $env{ '-Env' } ;
	
		$self = tie %self, 'BerkeleyDB::Btree',
				-Filename => $filename,
				-Flags => DB_RDONLY, 
				%env,
				or warn "$filename $!" ;
		}

	$dbreg{ $self->[0] } = $self if ref $self ;
	return bless \%self, $class ;
	}

sub dosearch {
	my $ref = shift ;
	my $self = tied %$ref ;
	my $partkey = shift ;
	my $isunique = shift ;

	my %unique = () ;
	my @keys = () ;
	my @values = () ;
	my @each = () ;

	return [] unless $partkey ;
	my $length = length $partkey ;

	# database locked
	my $cursor = $self->db_cursor ;

	my $value = 0 ;
	my $key = $partkey ;
	my $status = $cursor->c_get( $key, $value, DB_SET_RANGE ) ;
	
	while ( $key ) {
		last if $status || substr( $key, 0, $length ) ne $partkey ;

		if ( $isunique ) {
			$unique{ $key }++ ;
			}
		else {
			push @each, [ $key, $value ] ;
			}

		$status = $cursor->c_get( $key, $value, DB_NEXT ) ;
		}

	
	$cursor = undef ;
	@each = map { [ $_, $unique{$_} ] } keys %unique if $isunique ;
	return \@each ;
	}

sub matchingkeys {
	return map { $_->[0] } @{ dosearch( @_ ) } ;
	}

sub matchingvalues {
	return map { $_->[1] } @{ dosearch( @_ ) } ;
	}

sub searchset {
	return map { @$_ } @{ dosearch( @_ ) } ;
	}

## intended for Btree's with incremented keys
sub nextrecord {
	my $ref = shift ;
	my $self = tied %$ref ;

	my $key = 0 ;
	my $value = 0 ;
	my $cursor = $self->db_cursor() ;
	$cursor->c_get( $key, $value, DB_LAST ) ;

	$ref->{ $key +1 } = {} ;
	$cursor = undef ;
	return $key +1 ;
	}


package BerkeleyDB::Lite::Btree::Lexical ;
	
our @ISA = qw( BerkeleyDB::Lite::Btree ) ;

sub scalars {
	my $invocator = shift ;
	my $class = ref $invocator || $invocator ;

	my $self = BerkeleyDB::Lite::Btree->scalars( @_ ) ;
	my $ref = tied %$self ;

	return undef unless $ref ;

	$ref->filter_store_key (
			sub {
				$_ = sprintf "%010d", $_ ;
				} ) ;

	$ref->filter_fetch_key (
			sub {
				$_ = sprintf "%d", $_ ;
				} ) ;

	return bless $self, $class ;
	}


1;
__END__
# Below is stub documentation for your module. You better edit it!

=head1 NAME

BerkeleyDB::Lite - Simplified Interface to BerkeleyDB

=head1 SYNOPSIS

  use BerkeleyDB::Lite;


=head2 ## Example 1

  ## Create a Hashed database
  my $db = new BerkeleyDB::Lite::Hash
		home => 'zoo',
		filename => 'residents' ;

  $db->{Samson} = new Primate ;
  $db->{Cornelius} = new Primate ;
  $db->{Kaa} = new Reptile ;


=head2 ## Example 2

  ## Create a Btree database allowing duplicates and scalar values
  my $types = scalars BerkeleyDB::Lite::Btree
		home => 'zoo',
		filename => 'types',
		&duplicatekeys ;

  $types->{primate} = 'Samson' ;
  $types->{primate} = 'Cornelius' ;
  $types->{reptile} = 'Kaa' ;

  printf "%s\n", join ' ', $types->recordset{primate} ;
  ## prints: Samson Cornelius

  $types->delete( primate => 'Samson' ) ;
  printf "%s\n", join ' ', $types->recordset{primate} ;
  ## prints: Cornelius


=head2 ## Example 3

  ## Create a database of visitors
  ## Use a table with arbitrary keys
  ## Track visitors by date/timestamp

  $tickets = new BerkeleyDB::Lite::Btree
		home => 'zoo',
		filename => 'tickets',
		&incrementkeys ;

  ## Lexical Alternative
  # $tickets = lexical BerkeleyDB::Lite::Btree
  #		home => 'zoo',
  #		filename => 'tickets' ;

  $bytime = scalars BerkeleyDB::Lite::Btree
		home => 'zoo',
		filename => 'ticketsbytime',
		&duplicatekeys ;

  ## Process a new visitor in real time
  sub newvisitor {
	my $serial = $tickets->nextrecord() ;
        my $date = getdate() ;	## not part of BerkeleyDB::Lite
	my $time = gettime() ;	## not part of BerkeleyDB::Lite

	$tickets->{$serial} = { @_ } ;
	$bytime->{ "$date $time" } = $serial ;
	return $serial ;
	}

  ## Get a list of visitors on a certain date
  sub showvisitorsbydate {
	my $date = shift ;
	return $bytime->matchingvalues( $date ) ;
	}

=head1 DESCRIPTION

BerkeleyDB::Lite is an interface to Paul Marquess's BerkeleyDB
that provides simplified constructors, tied access to data, and 
methods for returning multiple record sets.

=head2 Example 1

BerkeleyDB::Lite maintains BerkeleyDB environment references
in a package variable hash keyed on the B<home> argument.  The 
basic BerkeleyDB::Lite constructor arguments define the 
BerkeleyDB environment and database.  When the constructor 
is called, a previously opened environment is used if 
available.  Otherwise, a new environment is created and is 
available to future constructor requests.

This version of BerkeleyDB::Lite creates all environment objects 
as concurrent data stores.  Transactional data storage is not 
currently integrated.

By default, BerkeleyDB::Lite is designed to marshall objects into a 
database using the B<Storable> module.

Example 1 shows a simple application that illustrates both of 
these features.  The constructor is called with the minimum 
arguments to identify the environment and the database.

These few lines of code are sufficient to add persistent object 
support to an application.


=head2 Example 2

One of Berkeley's most appealing features is support for 
duplicate keys.  This feature enables a programmer to use 
persistent arrays, where elements can be accessed, added, 
and deleted without marshalling.
 
Example 2 uses the B<scalars> constructor which disables the 
automatic serialization of record access.  Otherwise, if the 
B<new> constructor is used, scalars will be returned as scalar 
references, regardless of how they are stored.

B<&duplicatekeys> is a subroutine that returns a pair of 
constants as a shortcut.  The constants are defined in the 
BerkeleyDB module.

The B<recordset> method returns a stored list from the database.  
This method is available to both BerkeleyDB::Lite::Btree and
BerkeleyDB::Lite::Hash classes.

The B<delete> method is used to delete an element from the list.  
Since BerkeleyDB::Lite adheres to the B<Tie> interface, the 
B<delete function> can normally used to remove stored objects.  
The B<delete method> should be used on databases with duplicate 
keys to avoid indeterminate results.

BerkeleyDB returns the status of a delete operation.  This 
feature can be used to delete an entire list using the following 
idiom:

  while ( ! delete $types->{primate} ) {}


A BerkeleyDB database configured for duplicate keys also allows 
duplicate key/value pairs.  For most one-to-many data sets, 
key/value pairs should be unique.  This issue has not been 
completely resolved.  Presently, the workaround is to import a 
retrieved list into a hash structure:

  %unique = map { $_ => 1 } $types->recordset('primate') ;
  keys %unique ;


However, care should be taken when deleting elements.  The 
delete method for duplicate keys should almost always be 
invoked using an idiom similar to the one above:

  while ( ! $types->delete( primate => 'samson' ) ) {}


Another source of problems occurs when using the B<delete> 
method on databases containing objects.  In this case, the 
second argument may refer to an object that does not exactly 
match the stored value.  The following code illustrates this 
difficulty:

  my $cats = new BerkeleyDB::Lite::Btree(
		home => 'zoo',
		filename => 'cats',
		&duplicatekeys,
		) ;

  my $Felix = new BigCat dinner => 'antelope' ;
  $cats->{lion} = $Felix ;
  $Felix->{dinner} = 'gazelle' ;
  $cats->delete( lion => $Felix ) ;		## fails


This problem also occurs because the results of the 
marshalling operation differ depending on whether numbers 
are interpreted as integers, floats, or strings.  Thus an 
object's value may change merely as a result of its 
context.  The following example illustrates the situation:

  $weight = '300 lbs.' ;
  $weight =~ s/\D//g ;
  my $Felix = new BigCat( weight => $weight ) ;	## member as string
  $cats->{lion} = $Felix ;
  $cats->delete( lion => $Felix )		## operation fails
		if $Felix->{weight} > 200 ;	## member as integer 


=head2 Example 3

Example 3 shows a few additional features helpful to 
developers accustomed to relational databases.  These 
features take advantage of the B<Btree> database capabilities, 
and are not available to BerkeleyDB::Lite::Hash objects.

The B<nextrecord> method of BerkeleyDB::Lite::Btree returns 
a new unique key.  Each B<nextrecord> call creates a new 
blank record to avoid race conditions, and returns the new 
key.  This method creates a key by adding 1 to the last 
record.  In order to ensure that the last record contains 
the highest valued key, use the B<&incrementkeys> argument 
to the BerkeleyDB::Lite::Btree constructor.  The 
B<&incrementkeys> function is a shortcut that returns a 
CODE constant that forces numerical Btree sorting.

There is a significant disadvantage to databases created 
using the B<&incrementkeys> argument.  The resulting 
databases are incompatible with SleepyCat utilities such as 
B<db_dump> and B<db_verify>.  As an alternative, 
B<nextrecord> can be called as a method from the
BerkeleyDB::Lite::Btree::Lexical subclass.  This subclass 
functions identically, but the numerical keys are stored 
as zero padded strings.  Therefore, a restriction on 
B<Lexical> subclass databases is that keys must be 
numerically less than 10,000,000,000.

The B<lexical> constructor to the BerkeleyDB::Lite::Btree 
class is synonymous with the B<new> constructor to the 
BerkeleyDB::Lite::Btree::Lexical subclass.

BerkeleyDB::Lite also implements another nice Berkeley
feature: partial string matching.  The methods 
B<matchingkeys>, B<matchingvalues>, and B<searchset> 
all return a set of records whose keys begin with a 
common substring.

For example, if keys are defined with the following 
format: S<"2002 Jul 14 15:30">, the following data can 
be returned:

  ## All records for the year
  @annually = $bytime->matchingkeys('2002 ') ;

  ## All records for the month
  @monthly = $bytime->matchingvalues('2002 Jul ') ;

  ## All records for the day
  %daily = $bytime->searchset('2002 Jul 14 ') ;	

B<matchingkeys> returns an array of the matching records' 
keys.  B<matchingvalues> returns an array of the matching 
records' values.  Unforeseen confusion may result from the 
method name B<matchingvalues>- the returned records have 
matching keys, but the record values are returned.

B<searchset> returns the matching records as key/value pairs 
that can populate an associative array as shown.  However, 
using an associative array is pointless if the 
database contains duplicate keys.  The following code is an 
effective technique for capturing the results of this type 
of search:

    foreach ( $bytime->matchingkeys( '2002 Jul 14', &uniquekeys ) ) {
	$daily{ $_ } = [ $bytime->recordset( $_ ) ] ;
	}

B<&uniquekeys> returns a constant that is used primarily as 
an argument to the B<matchingkeys> method to filter duplicate 
results from the database.  When this argument is passed to 
the B<&searchset> method, the values in the key/value pairs 
indicate a record count.  B<&uniquekeys> cannot be used with 
the B<matchingvalues> method.


=head2 EXPORT

&duplicatekeys
&incrementkeys
&uniquepairs
&uniquekeys


=head1 AUTHOR

Jim Schueler, E<lt>jschueler@tqis.comE<gt>

=head1 SEE ALSO

L<Storable>
L<BerkeleyDB>
F<http://www.sleepycat.com>

=cut