lib/Biblio/Database.pm - metacpan.org

# --*-Perl-*--
# $Id: Database.pm 10 2004-11-02 22:14:09Z tandler $
#

package Biblio::Database;
use 5.006;
use strict;
use warnings;
#use English;

# for debug:
use Data::Dumper;

BEGIN {
    use vars qw($Revision $VERSION);
	my $major = 1; q$Revision: 10 $ =~ /: (\d+)/; my ($minor) = ($1); $VERSION = "$major." . ($minor<10 ? '0' : '') . $minor;
}


# This class is a subclass of DBI::db, exported by the DBI module.
use DBI 1.0;
use Biblio::BP;

#use vars qw(@ISA);
#@ISA = qw(DBI::db);

use Carp;

# don't print sql communication
my $debug_sql = 0;


sub new {
  my $self = shift;
  my $class = ref($self) || $self;
  my %args = @_;

  #### first check for DSN!

  my $DSN = $ENV{'BIBLIO_DSN'} || $args{'dsn'};
  my $DBMS = $ENV{'BIBLIO_DBMS'} || $args{'dbms'} || 'ODBC';
  my $DBHOST = $ENV{'BIBLIO_HOST'} || $args{'host'} || $args{'dbhost'} ;
  my $DBNAME = $ENV{'BIBLIO_NAME'} || $args{'name'} || $args{'dbname'} || 'biblio';
  my $DBUSER = $ENV{'BIBLIO_USER'} || $args{'user'} || $args{'dbuser'};
  my $DBPASS = $ENV{'BIBLIO_PASS'} || $args{'pass'} || $args{'dbpass'};
  my $DBATTR = $args{'attr'} || $args{'dbattr'};
  $debug_sql = 1 if $args{'debug_sql'};
  $DBMS || $DSN or croak "Missing DBMS or DSN specification, did you give one?\n" .
  		"You can set \$BIBLIO_DBMS or \$BIBLIO_DSN in the environment.\n";

  # If we use Informix, specify host via INFORMIXSERVER env var. At
  # this place, we may need to add special cases for other DBMS;
  # init code that must be done before the first

  # we use ODBC often in case of ADABAS-driver. ADABAS has,
  # like ORACLE, his own user managment inside each Database. So we
  # need user authorisation for ODBC ...

  if ( $DBMS eq 'Informix' ) {
	if ( ! $ENV{INFORMIXSERVER} ) {
	  chomp( $ENV{INFORMIXSERVER} = $DBHOST || `hostname` );
	}
	if ( ! $ENV{INFORMIXDIR} ) {
	  if ( -d '/opt/informix' ) {
		$ENV{INFORMIXDIR} = '/opt/informix';
	  } elsif ( -d '/usr/informix' ) {
		$ENV{INFORMIXDIR} = '/usr/informix';
	  } else {
		croak "Cannot locate Informix directory, please set INFORMIXDIR";
	  }
	}
	if ( $DBNAME !~ /\@/ ) {
	  $DBNAME .= "\@$DBHOST";
	}
  } elsif ( $DBMS eq 'Solid' ) {
	unless ( $ENV{DBI_USER} ) {
	    $ENV{DBI_USER} = $ENV{DBI_PASS} = 'solid';
    }
  } elsif ( $DBMS eq 'mysql' ) {
	unless( $DSN ) {
		$DSN = "dbi:mysql:database=$DBNAME";
		$DSN .= ";host=$DBHOST" if $DBHOST;
	}
  }

  # Connect to database.
  # FIXME: Should set RaiseError to true here.
  $DSN = "dbi:$DBMS:$DBNAME" unless( $DSN );
  $DBATTR = { AutoCommit => 0,
			PrintError => 0,
			RaiseError => 0,
			} unless $DBATTR;
  my $db = DBI->connect($DSN, $DBUSER, $DBPASS, $DBATTR)
      or croak "$DBI::errstr\nCannot connect to $DBMS database $DBNAME ($DSN)";

  # FIXME: Doesn't work as expected. Fields with several blanks
  # should be reduced to the empty string, while they have one blank
  # now. (Or similar, at least that's my impression, not fully
  # checked.) Therefore we set it to 0 and chop the blanks ourselves
  # below.
  $db->{ChopBlanks} = 0;

#    $db->do ('set transaction isolation level read committed;')
#	or croak "$DBI::errstr\nCannot set isolation level";
  $self = {
	'db' => $db,
	%args,
	};
  return bless $self, $class;
}

sub DESTROY ($) {
  my $self = shift;
  $self->disconnect();
}



#
#
# DBI methods
#
#

sub db { return shift->{'db'}; }
sub disconnect { my $self = shift;
  my $db = $self->db();
  $db->disconnect(@_) if($db);
  $self->{'db'} = undef;
}
sub do { return shift->db()->do(@_); }
sub commit { return shift->db()->commit(@_); }

sub prepare {
  my $self = shift;
  my ($stmt, $attr) = @_;

  # Sanitize the query a bit: Chop trailing semicolon, chop comments
  $stmt =~ s/;\s*$//;
  $stmt =~ s/^--.*$/\t/mg;

  return $self->db()->prepare($stmt, $attr);
}

# $table = $db->query(SQL_STMT, \%ATTR, PARAM, ...);
# foreach $row ( @$table ) { <access @$row> }
#
# Make a query and gather complete result. Use it if it's of no
# advantage to output the result row-wise during fetch, it'll save you
# to handle statement handlers, etc.
sub query {
  my $self = shift;
  my ($sql_stmt, $attr, @params) = @_;
  # my $driver = $self->{Driver};
  my ($maxrows, @result) = (undef);	# @result is the empty list

  # Check if maxrows attribute is set.
  if ( defined($attr) && defined(%$attr) && exists($attr->{maxrows}) ) {
	$maxrows = $attr->{maxrows};
	delete($attr->{maxrows});
  }

  my $sth = $self->prepare($sql_stmt, $attr)
      or croak "$DBI::errstr\nCannot prepare SQL stmt \n$sql_stmt";

  $sth->execute(@params)
      or croak "$DBI::errstr\nCannot execute SQL statement";

  # NOTE: We cannot use DBI handle method fetchall_arrayref(), as it
  # won't chop blanks correctly and it won't look at the maxrows
  # attribute. Maybe, at some time, DBI will support our demands
  # directly.
  my @row;
  my $row_count = 0;
  while ( @row = $sth->fetchrow_array() ) {

	# We need to strip SPC bytes at the end.
	#
	# FIXME: Check if that's needed for _all_ database drivers.
	# Check in particular selection of empty columns and columns
	# with only one space. In both cases we should get the empty
	# string!
	foreach ( @row ) {
	    s/\s*$//  if ( defined($_) );
	}

	# We have to copy to be able to keep the reference to the row.
	# Otherwise we would keep $n$ references to the same row...
	# Note: The information that each fetchrow() returns the same
	# row (with different values) is from DBD::Informix, version
	# 0.24. As 0.25 is a complete rewrite, it might be different
	# here. Nevertheless we'll copy for a while.

        # FIXME: Check if it's still the same row. How about
        # $db->fetch? Does this return the same row?

	my @copy_row = @row;
	push (@result, \@copy_row);
#	print STDERR "DEBUG ==> row [$result[$#result]]: @row";

	last  if ( $maxrows  &&  ++$row_count >= $maxrows );
    }

  $sth->finish()
      or carp "$DBI::errstr\nProblems releasing SQL statement";
  return \@result;
}


#
#
# methods
#
#

sub getCiteKeys {
# return all paper IDs
  my $self = shift;
  my @paperIDs;
  my $row;
###  foreach $row (@{$self->queryPapers(undef, undef, ['CiteKey'])})
###    { push @paperIDs, $row->{'CiteKey'}; }
  @paperIDs = keys %{$self->queryPapers(undef, undef, ['CiteKey'])};
# print "@paperIDs\n";
  return @paperIDs;
}


sub papers {
# return all papers as defined in DB
  my $self = shift;
  my $papers = $self->{'biblioPapers'};
  if( not defined($papers) ) {
    $papers = $self->queryPapers();
    $self->{'biblioPapers'} = $papers;
  }
  return $papers;
}

sub allPaperFields {
	my $self = shift;
	return [ keys(%{$self->{'column-types'}}) ];
  #  return [
	#  "CiteKey", "CiteType", "PBibNote",
	#  "Identifier", "Location", "Authors",
	#  "SuperTitle", "Chapter",
	#  "Edition", "Editors",
	#  "Howpublished", "Institution", "Journal",
	#  "Month", "Keywords", "Number",
	#  "Organization", "Pages", "Publisher",
	#  "School", "Series", "Title",
	#  "ReportType", "Volume", "Year", "Source", "ISBN",
	#  "Category", "CrossRef", "File", "Recommendation",
	#  ];
}

sub biblio_table {
	my ($self) = @_;
	return $self->{'biblio_table'} || 'biblio';
}

my @_citeTypes = qw(
	article
	book
	booklet
	inproceedings
	inbook
	incollection
	inproceedings
	journal
	manual
	masterthesis
	misc
	phdthesis
	proceedings
	report
	unpublished
	email
	web
	video
	talk
	poster
	thesis
	patent
	);
my %_types = (
	'conference' => 6,
	'techreport' => 13,
	);
for( my $i = 0; $i < scalar(@_citeTypes); $i++ ) {
  $_types{$_citeTypes[$i]} = $i;
}
#  print Dumper \%_types;

sub citeTypes {
  return [@_citeTypes];
}
sub CiteTypeForType {
  my ($self, $Type) = @_;
  return defined($Type) ? $_citeTypes[$Type] : undef;
}
sub TypeForCiteType {
  my ($self, $CiteType) = @_;
  return $_types{$CiteType};
}

sub queryPapers {
# query papers, look in $queryFields for $pattern
  my $self = shift;
  my ($pattern, $queryFields, $resultFields, $ignoreCase) = @_;
  $ignoreCase = 1 if not defined($ignoreCase);
  $pattern = lc($pattern) if($ignoreCase);
  $resultFields = $self->allPaperFields() unless defined($resultFields);
  my $table = $self->biblio_table();
  my $sql = 'SELECT ' . join(', ', map($self->quoteField($_), @{$resultFields})) .
	" FROM $table" .
	($queryFields && $pattern ?
	  ' WHERE ' .
	  join(' OR ',
	   map('(' . $self->quoteField($_, $ignoreCase) . 
		   " LIKE " .
			$self->quoteValue($_, $pattern) .
			')', @{$queryFields}))
	: ''
	);
  print STDERR "$sql\n" if( $debug_sql );
  my $papers = $self->query($sql) or
    die "$DBI::errstr\nSelect failed for $sql\n";
  return $self->papersArrayToHash($resultFields, $papers);
}

sub queryPaperWithId ($$) {
  my ($self, $id) = @_;
  my $resultFields = $self->allPaperFields();
  my $table = $self->biblio_table();
  my $sql = 'SELECT ' . join(', ', map($self->quoteField($_, 0), @{$resultFields})) .
	" FROM $table WHERE " . 
		$self->quoteField("CiteKey") . ' = ' . 
		$self->quoteValue("CiteKey", $id);
  print "$sql\n" if( $debug_sql );
  my $papers = $self->query($sql) or
    croak "$DBI::errstr\nSelect failed for $sql\n";
#  return undef unless $papers->[0];
  my $result = $self->papersArrayToHash($resultFields, $papers);
  return $result->{$id};
}

#  my %fieldMapping = qw(
	#  CiteKey	Custom4
	#  Category	Custom1
	#  Recommendation	Custom5
	#  CrossRef	Custom2
	#  File	Custom3
	#  Keywords	Note
	#  PBibNote	Annote
	#  Organization	Organizat
	#  Institution	Institutn
	#  Authors	Author
	#  Editors	Editor
	#  ReportType	RepType
	#  SuperTitle	Booktitle
	#  Source	URL
	#  Location	Address
	#  Howpublished	Howpublish
	#  );
sub quoteField($$;$) {
	my ($self, $field, $ignoreCase) = @_;
	my $mapping = $self->{'column-mapping'} || {};
	$field = $mapping->{$field} if exists($mapping->{$field});
	$field = "\"$field\"" if $self->{'quote-column-name'};
	$field = "lower($field)" if $ignoreCase 
			&& $self->{'supports-lower'};
	return $field;
}

sub quoteValue ($$$) {
	my ($self, $field, $value) = @_;
	my $column_types = $self->{'column-types'} || {};
	my $type = $column_types->{$field};
	if( $type =~ /INT/i ) { return $value; }
#  print "$type\n";
	# grab the length from the type, strip any text.
	$type =~ /(\d+)/;
	my $length = $1 || $self->{'column-max-string-length'} || 254;
	if( length($value) > $length ) {
		print STDERR "WARNING: long string value in $field: ", 
			length($value),
			" (max $type) -> might fail ...\n";
	}
	$value =~ s/\'/\'\'/g;
	return "'$value'";
}

sub papersArrayToHash ($$$) {
# map all paper arrays to paper hashs
  my $self = shift;
  my ($resultFields, $papers) = @_;
  my @results = map($self->paperArrayToHash($resultFields, $_), @{$papers});
  my %papers = map( ($_->{'CiteKey'} => $_), @results);
  return \%papers;
}

sub paperArrayToHash ($$$) {
# map the given paper array to a paper hash
	my $self = shift;
	my ($resultFields, $paper) = @_;
#print $paper->[0], " ";
#  print STDERR '.';
	my $r = {}; my $id; my $v; my $i = 0;
	foreach $id (@{$resultFields}) {
		$v = $self->replaceShortcuts($paper->[$i++]);
		$r->{$id} = $v if defined($v) && $v ne '';
	}
	# now check for some important fields
	$r->{'CiteKey'} = '<<no CiteKey found>>' 
		unless defined($r->{'CiteKey'});
	# convert CiteType from numeric to text format
	my $CiteType = $r->{'CiteType'};
	if( defined($CiteType) && 
			$self->{'column-types'}->{'CiteType'} =~ /INT/i &&
			$CiteType =~ /^\d+$/ ) {
		$CiteType = $self->CiteTypeForType($CiteType);
		$r->{'CiteType'} = $CiteType if defined $CiteType;
	}
	# my database (StarOffice) has not all fields I need.
	# therefore I use the PBibNote field to generate the
	# contents of several others
	if( defined($r->{'PBibNote'}) ) {
		my @fields = split(/\r?\n/, $r->{'PBibNote'});
		#my $dump =0;
		my @notes;
		foreach my $f (@fields) {
			if( $f =~ /^([a-z]+)\s*=\s*(.*)\s*$/i ) {
				$r->{$1} = $2; #$dump = 1;
			} else {
				push @notes, $f;
			}
		}
		if( scalar @notes ) {
			$r->{'PBibNote'} = join("\n", @notes);
		} else {
			delete $r->{'PBibNote'};
		}
		#  print Dumper $r if $r->{'CiteKey'} eq 'iRoom-PointRight';
	}
	return $r;
}


#
#
# add & update papers
#
#

my %aliasFields = qw/
	DOI			Source
	/;

sub storePaper {
  my ($self, $ref, $update) = @_;
  my $id = $ref->{'CiteKey'};
  my $old_ref = $self->queryPaperWithId($id);

  # prepare the fields ...
  my %refFields = %$ref;

  unless( defined($old_ref) ) {
    # check for some standard fields that must be present!
	print "no CiteKey" unless defined($ref->{'CiteKey'});
	print "no Category" unless defined($ref->{'Category'});
	if( ! defined($refFields{'Identifier'}) ) {
	  #  my $key = Biblio::BP::Util::genkey(%$ref);
	  #  $key = Biblio::BP::Util::regkey($key);
	  my $key = $ref->{'CiteKey'};
	  print STDERR "Generate new Identifier: $key\n";
	  $refFields{'Identifier'} = $key;
	}
  }

	# ... copy some fields (as defaults for others)
	foreach my $f (keys %aliasFields) {
	  if( defined $refFields{$f} &&
			! defined $refFields{$aliasFields{$f}} ) {
	    $refFields{$aliasFields{$f}} = $refFields{$f};
	  }
	}
	
	# ... set the BibDate
	my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
		localtime();
	$refFields{'BibDate'} = sprintf("%04d-%02d-%02d %02d:%02d:%02d",
		$year + 1900, $mon, $mday, $hour, $min, $sec);
	#  use POSIX qw(strftime);
	#  $now_string = strftime("%a %b %e %H:%M:%S %Y", localtime());

  # ... convert CiteType from text to numeric format
  if( defined($refFields{'CiteType'}) &&
	    $self->{'column-types'}->{'CiteType'} =~ /INT/i ) {
    my $type = $self->TypeForCiteType($refFields{'CiteType'});
	#print "type = $type\n";
	if( defined $type ) {
	  $refFields{'CiteType'} = $type;
	}
  }

  my %bibFields;
  foreach my $f (@{$self->allPaperFields()}) {
    if( exists($refFields{$f}) ) {
	  $bibFields{$f} = $self->quoteValue($f, $refFields{$f});
	  delete $refFields{$f};
	}
  }
  # add all remaining fields to PBibNote field
  if( %refFields ) {
#    print STDERR "remaining fields for $id:\n";
##	print Dumper \%refFields;
    #### ...
	my $note = join("\n", map( "$_ = $refFields{$_}", keys %refFields));
#	print STDERR "$note\n";
	$note = "$bibFields{'PBibNote'}\n\n$note\n" if defined $bibFields{'PBibNote'};
	$bibFields{'PBibNote'} = $self->quoteValue('PBibNote', $note);
  }
#print Dumper \%bibFields;

  my $biblio = $self->biblio_table();
  my $sql;
  if( defined($old_ref) ) {
    # this ref was already in the DB => update it
	my $assignments = join(', ',
	  map($self->quoteField($_, 0) . " = $bibFields{$_}", keys %bibFields));
	$sql = "UPDATE $biblio SET $assignments WHERE " .
  	  $self->quoteField("CiteKey") . " = '$id'"
  } else {
    # this is a new ref => insert it into the DB
	my ($fields, $values) =
	  (join(', ', map($self->quoteField($_, 0), keys %bibFields)),
	   join(', ', values %bibFields));
    $sql = "INSERT INTO $biblio ($fields) VALUES ($values)";
  }
  print "$sql\n" if( $debug_sql );
  $self->do($sql) or
    croak "\nDB access failed:\n\n$DBI::errstr\n\n$sql\n";
}



#
#
# shortcuts
#
#

sub replaceShortcuts {
# look in $text and replace all shortcuts
  my ($self, $text) = @_;
  return undef unless defined($text);
  # check, if there is any {} field at all -> this is *much* faster!
  return $text unless $text =~ /\{/;
  my $shortcuts = $self->shortcuts();
  my $pattern = join("|", map( /:$/ ? "$_.*" : $_, (keys(%{$shortcuts}))));
#print $pattern;
  $text =~ s/\{($pattern)\}/ $self->expandShortcut($shortcuts, $1) /ge;
  return $text;
}
sub expandShortcut {
  my ($self, $shortcuts, $text) = @_;
  my @pars = split(/:/, $text);
  my $k = shift @pars; if( @pars ) { $k = "$k:"; }
  my $v = $shortcuts->{$k};
  $v =~ s/%(\d)/ $pars[$1-1] /ge;
#print "\n\n$k ---- $v\n\n";
  return $v;
}

sub shortcuts_table {
	my ($self) = @_;
	return $self->{'shortcuts_table'};
}
sub shortcuts {
	my ($self) = @_;
	return $self->{'shortcuts'} if defined($self->{'shortcuts'});
	my $shortcuts_table = $self->shortcuts_table();
	return $self->{'shortcuts'} = {} unless $shortcuts_table;
	my $sql = "SELECT * FROM $shortcuts_table";
	print "$sql\n" if( $debug_sql );
	my $result = $self->query($sql);
	unless( $result ) {
		print STDERR "$DBI::errstr\nSelect failed for $sql\n";
		return {};
	}
	#use Data::Dumper;
	#  print Dumper $self;
	my %scs = map(($_->[0] => $_->[1]), @{$result});
	return $self->{'shortcuts'} = \%scs;
}

sub updateShortcuts {
  my ($self) = @_;
  delete $self->{'shortcuts'};
}

1;

#
# $Log: Database.pm,v $
# Revision 1.17  2003/06/16 09:07:41  tandler
# - support for 'mysql' dbms
# - DB design can now be configured (columns in DB)
# - field to DB column mapping can now be configured
# - the CiteType column in DB can be a text or numeric format,
#   numeric format is automatically converted
# - field "BibDate" is always updated in storePaper() call
#
# Revision 1.16  2003/05/22 11:49:53  tandler
# some fixes. Custom2 is now CrossRef
#
# Revision 1.15  2003/05/19 13:03:35  tandler
# disable AutoComit, we commit explicitely.
#
# Revision 1.14  2003/04/16 15:03:23  tandler
# suppress empty fields (needed for DBD::CSV)
# the name of the shortcuts table can now be configured (no shortcuts are used, if set to empty of undef).
#
# Revision 1.13  2003/04/15 17:35:58  tandler
# some support for DBD::CSV
#  - more flexibility to configure dbname, quoting of fields etc.
# --> still need improvement ...
#
# Revision 1.12  2003/04/15 13:48:34  tandler
# fixed prototypes
#
# Revision 1.11  2003/04/14 09:43:41  ptandler
# field mapping updated
#
# Revision 1.10  2003/01/27 21:10:20  ptandler
# use CiteKey as StarOffice's "Identifier" field
#
# Revision 1.9  2003/01/14 11:06:52  ptandler
# new config
#
# Revision 1.8  2002/11/05 18:27:52  peter
# PBibNote handling fix
#
# Revision 1.7  2002/11/03 22:12:06  peter
# PBibNote handling
#
# Revision 1.6  2002/09/11 10:43:44  peter
# BP::readpbib
#
# Revision 1.5  2002/08/22 10:38:04  peter
# - Field name fix
# - citeType fix (report instead of techreport, techreport is now an alias)
# - use alias fields for import of records
#
# Revision 1.4  2002/06/06 07:26:59  Diss
# renamed PaperID -> CiteKey (new canonical fields due to bp)
#
# Revision 1.3  2002/06/03 11:38:31  Diss
# support to add/update refs in biblio DB
#
# Revision 1.2  2002/04/03 10:10:16  Diss
# include some stuff from NPC's Database.pm
#
# Revision 1.1  2002/03/27 10:00:50  Diss
# new module structure, not yet included in LitRefs/LitUI (R2)
#
# Revision 1.6  2002/03/18 11:15:47  Diss
# major additions: replace [] refs, generate bibliography using [{}], ...
#
# Revision 1.5  2002/03/07 12:01:51  Diss
# per default return all fields in queries
#
# Revision 1.4  2002/02/25 12:20:08  Diss
# Biblio can now ignore case in queries (using the SQL lower function),
# LitUI queries are now case-insensitive, and system() is used to open files.
#
# Revision 1.3  2002/02/11 11:57:06  Diss
# lit UI with search dialog, script to start/stop biblio, and more ...
#
# Revision 1.2  2002/01/26 18:21:54  ptandler
# - disconnect from Biblio-DB in LitRef's destructor (DESTROY)
#   -> this allows to re-read the entries without re-connecting
# - moved Word-Doc support from LitUI to LitRef
#
# Revision 1.1  2002/01/14 08:30:26  ptandler
# new module "Biblio.pm" to access biblio database via DBI/ODBC
# LitRefs can get all defined paperIDs now from BIBLIO (using Biblio.pm)
#
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)