#!/usr/bin/perl -w
# $Id: $
use lib "./lib";
use strict;
use utf8;
use warnings;
use POSIX qw(strftime);
use HTTP::Date;
use SeeAlso::Identifier;
use SeeAlso::Source::BeaconAggregator::Maintenance;
use Getopt::Long;
use Pod::Usage;
our $VERSION = '0.2_88';
##### Compatibility
use SeeAlso::Identifier::GND;
if ( $SeeAlso::Identifier::GND::VERSION eq "0.54" ) {
no strict;
eval <<'XxX' or die "tweak failed: $@";
#!!! Have to tweak internals of SeeAlso::Identifier::GND for successful operation !!!
*SeeAlso::Identifier::GND::canonical = \&SeeAlso::Identifier::GND::normalized;
*SeeAlso::Identifier::GND::hash = \&SeeAlso::Identifier::GND::indexed;
#!!! !!!#
XxX
}
#####
my $verbose = 1;
my $ignore_header_errors = 0;
my $action = "list";
my $dbroot = "./beacondbs";
my ($dsn, $cache_size, @pragmas, @presets, @idfilters, $idfinish);
my ($distinct, $force, $keeposd, $quiet, $dbfile, $itype, $redirs);
# Application constants
our $EXAMPLEGOAL = 100; # don't attempt to find examples with higher hit count
our $RETRYWAIT = 213*60; # w/o #REVISIT hint: never attempt more often (3h33')
our $NOREVISITWAIT = 1344*60; # w/o #REVISIT hint: never revisit sooner (22h22')
our $REVISITFORCE = 4*30*24*3600; # with #REVISIT hint: force revisit after (4m)
our $NOREVISITFORCE = 4*7*24*3600; # w/o #REVISIT hint: force revisit after (4w)
=head1 NAME
sasbactrl - command line interface to SeeAlso::Source::BeaconAggregator and
auxiliary classes
=head1 SYNOPSIS
sasbactrl [Options] [Action [Args ...]]
Actions and Arguments:
init [--[no]redirs]
[--itype type] Set up Database for dsn (dbroot must exist)
load alias [uri [file]]
update [pattern | seqno_or_alias [uri]]
status [seqno/alias/pattern] parts of "list" and "headers"
unload seqno/alias/pattern
purge seqno/alias/pattern
list [seqno/alias/pattern]
headers [seqno/alias/pattern]
header seqno/alias/pattern [field=val ...]
dumposd show OpenSearchDescription
refreshosd refresh (then show) OpenSearchDescription
dumpmeta [cgi_base_url ...] show beacon header
beacon cgi_base_url [uAformatname]
setmeta "field: val" ["field: val" ...]
loadmeta header_template_file load OSD and Beacon meta constants
idststat [seqno/alias/pattern]
idcounts [identifier/pattern]
idlist [identifier/pattern]
incidence [identifier/pattern]
admin [field[=value]] list/get/set admin properties:
DATA_VERSION
IDENTIFIER_CLASS
deflate Apply VACUUM and REINDEX on SqLite Db
version gives versions numbers of involved modules
noop do nothing (e.g. if --pragma's do all the work)
Options:
(--dbfile | --dsn )
--force
--keeposd
--verbose|--quiet
--itype
--dbroot ($dbroot)
--ignore-header-errors [for update and load]
--distinct [for idcounts and idstat]
--whitelist RE for [update and load]
--blacklist RE for [update and load]
--preset HEADER=value [for "beacon", repeatable]
--pragma [SQLite pragma] (repeatable)
--cache_size [SQLite pragma value]
=cut
=head1 OPTIONS
=head2 Global options for (almost) all types of actions
=over 8
=item B<--dbfile> I<filename>
Name of database file relative to C<dbroot>
=item B<--dsn> I<basename>
Database file will be the file I<basename-db> in folder I<basename>,
giving SQLite enough headroom for temporary files.
Relative to C<dbroot>.
=item B<--dbroot>
Prefix for --dbfile or --dsn (relative paths only)
=item B<--force>
Use with care.
=item B<--verbose>
Sometimes shows more.
=item B<--quiet>
Sometimes shows less.
=item B<--itype>
Identifier type (corresponding subclass of SeeAlso::Identifier must be installed)
=item B<--redirs=[0|1]>
Prepare additional redirection index
=item --ignore-header-errors
[for update and load]
=item --distinct
[for idcounts and idstat]
=back
=cut
GetOptions (
'verbose+' => \$verbose,
'quiet' => \$quiet,
'distinct' => \$distinct,
'force' => \$force,
'keeposd' => \$keeposd,
'ignore-header-errors' => \$ignore_header_errors,
'redirs!' => \$redirs,
'itype=s' => \$itype,
'dbfile=s' => \$dbfile,
'dbroot=s' => \$dbroot,
'cache_size=s' => \$cache_size,
'pragma=s' => \@pragmas,
'preset=s' => \@presets,
'whitelist=s' => sub { push(@idfilters, "m\x0e$_[1]\x0e && return \@_;"); $idfinish = 'undef'; },
'blacklist=s' => sub { push(@idfilters, "m\x0e$_[1]\x0e && return undef;"); $idfinish = '@_'; },
'dsn=s' => \$dsn,
) or pod2usage({-message => 'Invalid option encountered', -exitval => 2});
if ( @ARGV && $ARGV[0] !~ /^-/ ) {
$action = shift @ARGV;
$action =~ s/\s$//;
}
$verbose = 0 if $quiet;
$dbroot ||= "";
$dbfile ||= "";
$dsn ||= "";
my $filtersub;
if ( @idfilters ) {
$filtersub = eval "sub { local(\$_) = \@_; @idfilters ; return $idfinish;}";
}
local($|) = 1 if $verbose;
my %passopts = (
($distinct ? ( 'distinct' => $distinct) : ()),
($force ? ( 'force' => $force) : ()),
($verbose ? ( 'verbose' => $verbose) : ()),
($ignore_header_errors ? ( 'ignore-header-errors' => $ignore_header_errors) : ()),
($filtersub ? ( 'filter' => $filtersub) : ()),
);
if ( $dsn ) {
die "dbroot directory $dbroot does not exist" unless -d $dbroot;
$dbroot .= "/" unless $dbroot =~ m!/$!;
if ( -d $dbroot && ( ! -d "$dbroot$dsn" ) ) {
if ( $action eq "init" ) {
print "Will create database ${dsn}-db in $dbroot$dsn";
mkdir "$dbroot$dsn" or die "failed creating database dir $dbroot$dsn";
}
else {
die "no database initialised for dsn $dsn in $dbroot tree"};
}
}
$itype ||= "";
my $iclass;
if ( $itype ) {
my $package = "SeeAlso::Identifier::".$itype;
eval {
(my $pkgpath = $package) =~ s=::=/=g; # require needs path...
require "$pkgpath.pm";
import $package;
};
if ( $@ ) {
die "sorry: Identifier Class $package cannot be imported\n$@"};
$iclass = $package->new(); # don't test the outcome (blessed scalar!)
die "sorry: cannot setup instance for Identifier Class $package" unless defined $iclass;
}
else {
# $iclass = SeeAlso::Identifier->new(); # don't set, let it auto-initialize
};
my $db = SeeAlso::Source::BeaconAggregator::Maintenance->new(
'dbroot' => $dbroot,
$dbfile ? ('file' => $dbfile) : (),
$dsn ? ('dsn' => $dsn) : (),
ref($iclass) ? ('identifierClass' => $iclass) : (),
'accept' => {
'VERSION' => qr'0\.[12]|1.0',
'FORMAT' => '^([GP]ND-)?B(?i:EACON)(\s+[vV]?1.0)?$'},
'verbose' => $verbose,
) or die "could not open database $dsn / $dbfile";
$db->{dbh}->do("PRAGMA cache_size = $cache_size;") or die "could not set cache_size to $cache_size" if defined $cache_size;
foreach my $pragma ( @pragmas ) {
$pragma =~ s/^(['"])(.+)\1$/$2/;
$pragma =~ s/\s*;\s*$//;
# print "execute 'PRAGMA $pragma;':\n" if $verbose;
my $prows = $db->{dbh}->selectall_arrayref("PRAGMA $pragma;") or warn "trouble executing 'PRAGMA $pragma;'";
# my $i;
foreach my $rowref ( @$prows ) {
# warn "[".++$i."] - "."@$rowref\n";
print "@$rowref\n";
};
};
=head2 Actions
These are typically an interface to methods of SeeAlso::Source::BeaconAggregator::Maintenance
or of SeeAlso::Source::BeaconAggregator::Publisher
=over 8
=item C<init>
Creates a database with the necessary tables
=cut
if ( $action eq "init" ) {
$db->init(
((defined $redirs) ? ( 'prepareRedirs' => $redirs) : ()),
) or die("could not setup database structure")}
=item C<list>
Interface to the C<listCollections()> method.
List all loaded beacon files with identifier counts.
The optional parameter may specify a SQL-Pattern to restrict
the result.
=cut
elsif ( $action eq "list" ) {
my $seqno_or_alias = shift @ARGV;
while (my ($sq, $alias, $uri, $mtime, $counti, $countu) = $db->listCollections($seqno_or_alias)) {
my $mt = lct($mtime);
$counti ||= 0;
$countu ||= 0;
my $count = ($counti == $countu) ? $countu : "$countu/$counti";
print "$sq\t$alias\t$mt\t($count)\n\t$uri\n";
};
}
=item C<status>
Prints the complete administrative fields and beacon fields and overview
of the revisit policy for the beacon source(s) given by the first argument.
The first argument may be empty or a pattern to produce the information for
more than one source.
=cut
elsif ( $action eq "status" ) {
my $seqno_or_alias = shift @ARGV;
binmode(STDOUT, ":utf8");
printf("Load status for %s (%s)\n", $dsn, lct($^T));
while ( my ($hashref, $metaref) = $db->headers($seqno_or_alias) ) {
last unless defined $hashref;
printf("\n#%u\t%s\t%s\t%s\t(%u/%u i/u)\n", $metaref->{_seqno},
$metaref->{_alias} || "",
$metaref->{_sort} || "",
$metaref->{_mtime} || "???",
$metaref->{_counti} || 0, $metaref->{_countu} || 0);
printf(" < \t%s\n", $hashref->{FEED}) if $hashref->{FEED};
printf(" << \t%s\n", $metaref->{_uri} || "???") unless $hashref->{FEED} && $metaref->{_uri} && ($hashref->{FEED} eq $metaref->{_uri});
printf(" <<< \t%s\n", $metaref->{_ruri} || "???") unless $metaref->{_uri} && $metaref->{_ruri} && ($metaref->{_uri} eq $metaref->{_ruri});
printf(" ! \t%s\n", $metaref->{_admin}) if $metaref->{_admin};
my @dates;
my %stamps;
if ( $stamps{"stime"} = HTTP::Date::str2time($hashref->{TIMESTAMP}, "GMT") || 0 ) {
push(@dates, [2, "TIMESTAMP header", $stamps{"stime"}, ""])}
else {
push(@dates, [0, "[no TIMESTAMP header]", 0, ""])};
if ( $stamps{"rtime"} = HTTP::Date::str2time($hashref->{REVISIT}, "GMT") || 0 ) {
push(@dates, [2, "REVISIT header", $stamps{"rtime"}, ""])}
else {
push(@dates, [8, "[no REVISIT header]", 0, ""])};
$stamps{"mtime"} = HTTP::Date::str2time($metaref->{_mtime}, "GMT") || 0;
push(@dates, [2, "Current copy modified", $stamps{"mtime"}, ""]);
$stamps{"ftime"}= HTTP::Date::str2time($metaref->{_ftime}, "GMT") || 0;
my $fstat = $metaref->{_fstat};
push(@dates, [2, "Current copy loaded", $stamps{"ftime"}, $fstat]);
$stamps{"utime"} = HTTP::Date::str2time($metaref->{_utime}, "GMT") || 0;
my $ustat = $metaref->{_ustat};
push(@dates, [2, "Last update attempt", $stamps{"utime"}, $ustat]);
my($forced, $xtime, $statref) = policy(\%stamps);
my $xstime = $stamps{"mtime"} || $stamps{"stime"};
printf(" - Age: %s %s %s\n", tdist($^T -$xstime));
foreach ( @$statref ) {
my ($stamp, undef, $message) = @$_;
my $sign = ($stamp < $^T) ? "+" : "-";
$sign = "o" if ($sign eq "-") and ($message =~ /^\+/);
printf(" (%s) Reload condition: %s: %s %s\n", $sign, $message, tdist($stamp -$^T));
next if $stamp < $^T; # dont list past events
next if $message =~ /^-/; # suppress REVISIT pleonasm
$message =~ s/ policy.*$//;
$message =~ s/^\W//;
push(@dates, [2, $message, $stamp, ""]);
};
if ( $forced > 0 ) {
push(@dates, [9, "[Reload forced]", 0, ""])}
elsif ( $xtime > $^T ) {
push(@dates, [9, "=> next revisit after", $xtime, ""])}
elsif ( $forced ) {
push(@dates, [9, "=> Ready for reload!", 0, ""])}
else {
push(@dates, [9, "=> Ready for revisit", 0, ""])};
foreach my $lineref ( sort { $a->[0] <=> $b->[0]
|| $a->[2] <=> $b->[2]
|| $a->[1] cmp $b->[1]
} @dates ) {
if ( my $stat = $lineref->[3] ) {
$stat =~ s/replaced/rpl/;
$stat =~ s/deleted/del/;
$stat =~ s/duplicate/dup/;
$stat =~ s/ignored/ign/;
$stat =~ s/invalid/inv/;
printf(" * %-24s %30s\t%s %s %s\n **\t[%s]\n", $lineref->[1], lct($lineref->[2]), ($lineref->[2] ? @{[tdist($lineref->[2] -$^T)]}[0..2] : ("", "", "")), $stat);
}
else {
printf(" * %-24s %30s\t%s %s %s\n", $lineref->[1], lct($lineref->[2]), ($lineref->[2] ? tdist($lineref->[2] -$^T) : ("", "", "")));
};
};
}
}
=item C<headers>
Interface to the C<headers()> method.
Prints all OSD fields and then all beacon fields.
The first argument is a sequence number, alias or pattern.
See C<header> for restricting output to individual fields.
=cut
elsif ( $action eq "headers" ) {
my $seqno_or_alias = shift @ARGV;
binmode(STDOUT, ":utf8");
while ( my ($hashref, $metaref) = $db->headers($seqno_or_alias) ) {
last unless defined $hashref;
print "\n";
while (my ($key, $val) = each %$hashref ) {
next unless defined $val && $val && $val !~ /^\s+$/;
print "#$key: $val\n";
};
while (my ($key, $val) = each %$metaref ) {
####### TEMP CODE _count #########
next if $key eq "_count";
next unless defined $val;
print "$key - $val\n";
};
print "\n";
};
}
=item C<header>
Interface to the C<headerfield()> method.
The first parameter is a mandatory and specifies a sequence number,
alias or pattern (e.g. "C<%>") to operate on.
Further arguments are of the form I<fieldname> or I<fieldname>=C<value>
(value may be empty):
For each of these arguments output a line consisting of the field name, " - "
and the list of all values encountered. Subsequently, if the assignment form of the
argument is given, the corresponding field is set to C<value> for all
sequences met.
Otherwise (no list of field names or assignments is specified), output will be
a list of all I<alias> fields, roughly corresponding to C<headerfield "%" _alias>.
See C<headers> for listing all fields defined for given sequences.
=cut
elsif ( $action eq "header" ) {
my $seqno_or_alias = shift @ARGV or die "Please specify alias or number";
if ( @ARGV ) {
my $did = 0;
while ( scalar @ARGV && (my $field = shift @ARGV) ) {
my($key, $val) = split(/=/, $field, 2);
my($rows, @vals) = $db->headerfield($seqno_or_alias, $key, $val);
print "$key - @vals\n" if @vals && $verbose;
$did ++ if (defined $val) && $rows && (defined $vals[0]) && ($val ne $vals[0]);
};
if ( $did ) {
print "Changed $did fields for $seqno_or_alias\n";
refreshOSD(0) unless $keeposd;
};
}
else { # Simply dump a list of aliases
my($rows, @vals) = $db->headerfield($seqno_or_alias, "_alias");
print "@vals\n"
};
}
=item C<load>
load I<alias>, I<uri>, I<datafile>
Interface to the C<loadFile()> or C<update()> methods
Load beacon data from Uri (if I<uri> is given) or physical file
from disk (I<datafile> must be given).
The beacon file will under the alias I<alias> be ready for
later updates.
=cut
elsif ( $action eq "load" ) {
my $alias = shift @ARGV || "";
my $uri = shift @ARGV || "";
my ($cn, $cc);
if ( my $datafile = shift @ARGV ) {
($cn, $cc) = $db->loadFile($datafile, {_alias => $alias, _uri => $uri}, %passopts)}
elsif ( $uri ) {
($cn, $cc) = $db->update($alias || $uri, {_uri => $uri}, %passopts)}
else {
die ("Please specify an uri and optionally a beacon data file")};
die "Load failed\n" unless $cn;
print "Collection Number $cn ($cc records)\n";
refreshOSD($force) unless $keeposd;
}
=item C<update>
Interface to the C<update()> method
(Re)load beacon files with known URIs according
to the following policy: Respect the #REVISIT hint, assume a #REVISIT
period of 24h for beacon files without. Wait at least 5% of the file's
age (difference between last updateate attempt and modification time)
before trying again. Wait at least 4h anyway. However force reload
after a couple of months.
The C<update> method itself detects illegal headers and if the
beacon file was modified at all and may skip loading the file.
Use the C<--force> option to override the test on modification time
or clear the C<_mtime> field.
When called with no arguments, update B<all> beacon files.
=cut
elsif ( $action eq "update" ) {
my $seqno_or_alias = shift @ARGV;
my $did;
if ( $seqno_or_alias ) {
my $uri = shift @ARGV || "";
if ( $uri ) {
die "no wildcards allowed in seqno_or_alias when uri is provided: $seqno_or_alias" if $seqno_or_alias =~ /%/;
print "\n==> $seqno_or_alias ($uri) <==\n" if $verbose;
my ($cn, $cc) = $db->update($seqno_or_alias, {_uri => $uri}, %passopts);
if ( $cn ) {
$did ++;
print "Collection Number $cn ($cc records)\n" if $cn;
}
}
else {
my $aliasref = $db->RepoCols("_alias", $seqno_or_alias);
foreach ( map {$_->[0]} sort {$a->[1] cmp $b->[1] || $a->[0] <=> $b->[0]} map{[$_, $aliasref->{$_} || ""]} keys %$aliasref ) {
my $item = $aliasref->{$_};
print "\n==> $item (auto) <==\n" if $verbose;
my ($cn, $cc) = $db->update($item, {}, %passopts);
if ( $cn ) {
printf "%2u: %s successfully updated (new SeqNo %u, %u records)\n", ++$did, $aliasref->{$_}, $cn, $cc}
};
};
}
else {
my $aliasref = $db->RepoCols("_alias");
my $uriref = $db->RepoCols("_uri");
my ($revisitref, $updateref, $fetchedref, $modifiedref, $timestampref);
$revisitref = $db->RepoCols("REVISIT") || 0;
$updateref = $db->RepoCols("_utime") || 0;
$fetchedref = $db->RepoCols("_ftime") || 0;
$modifiedref = $db->RepoCols("_mtime") || 0;
$timestampref = $db->RepoCols("TIMESTAMP") || 0;
foreach ( sort {$a <=> $b} keys %$aliasref ) {
print $verbose ? "\n==> $aliasref->{$_} <==\n" : sprintf("%-10s - ", $aliasref->{$_});
unless ( $uriref->{$_} ) {
print "no uri known -> skipped\n";
next;
};
my %stamps = (
"stime" => $timestampref->{$_},
"mtime" => $modifiedref->{$_},
"ftime" => $fetchedref->{$_},
"utime" => $updateref->{$_},
"rtime" => $revisitref->{$_},
);
my $xstime = $stamps{"mtime"} || $stamps{"stime"}; # prefer "last modified" in favor of #TIMESTAMP
my $age = join("", (tdist($^T -$xstime))[0,1]);
my @states;
push(@states, sprintf("[%-26s %30s]\n", "Current Revisit hint", lct($stamps{"rtime"}))) if $stamps{"rtime"};
push(@states, sprintf("[%-26s %30s]\n", "Last action performed", lct($stamps{"utime"}))) if $stamps{"utime"};
push(@states, sprintf("[%-26s %30s]\n", "Current copy loaded", lct($stamps{"ftime"}))) if $stamps{"ftime"};
push(@states, sprintf("[%-26s %30s]\n", "Current copy modified", lct($stamps{"mtime"}))) if $stamps{"mtime"};
push(@states, sprintf("[%-26s %30s]\n", "Current timestamp header", lct($stamps{"stime"}))) if $stamps{"stime"};
my $forcereload;
unless ( $force ) {
print @states if $verbose;
my ($xtime, $statref);
($forcereload, $xtime, $statref) = policy(\%stamps);
unless ( $forcereload ) {
my($status, $info) = ("" x 2);
foreach ( @$statref ) {
my ($stamp, $code, $message) = @$_;
next if $stamp < $^T;
next if $message =~ /^\+/;
$message =~ s/^\W//;
$message =~ s/ policy//;
($status, $info) = ($code, $message);
if ( $verbose ) {
printf "[%4s] %-20s %30s -> skipped\n", $status, $info, lct($stamp)}
};
if ( $xtime > $^T ) {
unless ( $verbose ) {
printf "[%4s] next %30s (age: %s, %s)\n", $status, lct($xtime), $age, $info};
next;
};
}
};
if ( !$verbose ) {
printf("[%4s] curr %30s (age: %s, %s)\n", "LOAD", lct(($stamps{"stime"} && ($stamps{"stime"} < $stamps{"mtime"})) ? $stamps{"stime"} : $stamps{"mtime"}), $age, "will reload");
}
elsif ( $force ) {
print @states};
my ($cn, $cc) = $db->update($aliasref->{$_}, {}, %passopts, ($forcereload ? ( 'force' => 1) : ()), 'nostat' => 1);
if ( $cn ) {
printf "%2u: %s successfully updated (new SeqNo %u, %u records)\n", ++$did, $aliasref->{$_}, $cn, $cc;
# refreshOSD(0) unless $keeposd; # will be done anyway since $did true now
};
print " ***\n";
};
};
if ( $did or $force ) {
refreshOSD( $force ? 1 : 0)};
}
=item C<purge>
Interface to the <purge()> method.
Clears all beacon data for the seqnos or aliases given by
the first argument: The beacon source with its metadata
remains known to the database.
The argument may be a pattern, but the C<--force>
option is needed if actually more than one beacon file is
to be purged).
OSD fields are rebuild if something was done or C<--force>
is given.
=item C<unload>
Interface to the <unload()> method.
Clears all beacon data B<and> removes the listed headers
for the seqnos or aliases given by the first argument:
Any knowledge of the beacon source is effectively deleted
from the database.
The argument may be a pattern, but the C<--force>
option is needed if actually more than one beacon file is
to be unloaded.
OSD fields are rebuild if something was done or C<--force>
is given.
=cut
elsif ( ($action eq "unload") or ($action eq "purge") ) {
my $seqno_or_alias = shift @ARGV or die "Please specify alias, pattern or SeqNo";
if ( $db->$action($seqno_or_alias, %passopts) ) {
refreshOSD(1) unless $keeposd}
else {
warn "did not do anything\n";
refreshOSD($force) if $force and !$keeposd;
};
}
=item C<idstat>
Interface to the C<idStat()> method.
Counts indentifiers, optionally only from those beacon files with seqno or alias restricted by the first argument.
Recognized options are C<distinct> and C<verbose>.
=cut
elsif ( $action eq "idstat" ) {
print $db->idStat($ARGV[0], %passopts)." identifiers\n";}
=item C<idcounts>
Interface to the C<idCounts()> method.
Counts identifiers, optionally only these identifiers which
match the pattern given by the first argument.
Recognized options are C<distinct> and C<verbose>.
=cut
elsif ( $action eq "idcounts" ) {
while (my (@list) = $db->idCounts($ARGV[0], %passopts)) {
print "@list\n"};
}
=item C<idlist>
Interface to the C<idList()> method.
For each identifier output a complicated list of the corresponding entries.
Optionally only these identifiers which match the pattern given
by the first argument are shown.
=cut
elsif ( $action eq "idlist" ) {
binmode (STDOUT, ":utf8");
while (my (@list) = $db->idList(@ARGV)) {
my $id = shift @list;
print "$id";
local($") = ",";
foreach my $rowref ( @list ) {
pop @$rowref while @$rowref && (! defined $rowref->[$#{$rowref}]);
my @pretty = map { (defined $_) ? $_ : "" } @$rowref;
print "\t@pretty";
}
print "\n";
};
}
=item C<incidence>
Interpretation of the C<idList()> method.
For each identifier output a sorted list of aliases (or seqnos).
=cut
elsif ( $action eq "incidence" ) {
binmode (STDOUT, ":utf8");
my $aliasref = $db->RepoCols("_alias");
while (my (@list) = $db->idList(@ARGV)) {
my $id = shift @list;
my %sources;
foreach my $rowref ( @list ) {
my $seqno = $rowref->[0];
$sources{$aliasref->{$seqno} || "<$seqno>"} ++;
};
my @sorted = sort keys %sources;
local($") = " ";
print "$id\|".(scalar @sorted)."\|@sorted\n";
};
}
=item C<dumpmeta>
Interface to the C<dumpmeta> method.
Lists all header fields from the database.
=item C<beacon>
Interface to the C<beacon()> method.
Produces a beacon file (header fields plus beacon entries)
for the database.
The Base URL for the service to be denoted is mandatory as
first parameter.
=cut
elsif ( ($action eq "dumpmeta") or ($action eq "beacon") ) {
binmode(STDOUT, ":utf8");
my $cgibase = shift @ARGV;
unless ( $cgibase ) {
if ( $action eq "beacon" ) {
die "ERROR: You MUST provide a cgi base address\n"}
else {
warn "WARNING: You will have to provide a cgi base address\n"}
};
require SeeAlso::Source::BeaconAggregator::Publisher or die "could not require Publisher extension";
SeeAlso::Source::BeaconAggregator::Publisher->activate(); # "recast" all objects
my $preset = {'FORMAT' => 'BEACON', 'VERSION' => '0.1'};
foreach my $pset ( @presets ) {
$pset =~ s/^(['"])([^=]+)\1\s*=/$2=/;
$pset =~ s/=\s*(['"])([^=]+)\1$/=$2/;
$pset =~ s/^(['"])(.+)\1$/$2/;
if ( $pset =~ /^([A-Z]+)\s*=\s*(.*)$/ ) {
$preset->{$1} = $2}
else {
warn "ignoring --preset $pset\n"};
}
my ( $error, $headerref) = $db->$action($cgibase, @ARGV, $preset);
if ( $headerref && ($action eq "dumpmeta") ) {
print @$headerref};
}
=item C<setmeta>
Set OSD or beacon meta fields for the database.
=cut
elsif ( $action eq "setmeta" ) {
foreach ( @ARGV ) {
if ( /^#([A-Z][A-Z0-9-]*)(?::\s*(.*))?$/ ) {
my ($key, $val) = ($1, $2);
my $success;
if ( defined $val ) {
$success = $db->setBeaconMeta($key, $val)}
else {
$success = $db->clearBeaconMeta($key)};
print "#$key processed\n" if $success and $verbose;
}
elsif ( /^([A-Z][\w-]+)(?::\s*(.*))?$/ ) {
my ($key, $val) = ($1, $2);
my $success;
if ( defined $val ) {
$success = $db->addOSD($key, $val)}
else {
$success = $db->clearOSD($key)};
print "$key processed\n" if $success and $verbose;
}
else {
warn "cannot parse: >$_<"}
}
}
=item C<loadmeta>
Completely exchanges the OSD and beacon meta fields for the
database by the contents of the file given as first argument.
=cut
elsif ( $action eq "loadmeta" ) {
my $tplfile = shift @ARGV or die "please supply a template file";
open(TPL, "<:utf8", $tplfile) or die "cannot open template file $tplfile";
my ($didosd, $didmeta) = (0, 0);
while ( <TPL> ) {
if ( /^#([A-Z][A-Z0-9-]*):\s*(.*)$/ ) {
my ($key, $val) = ($1, $2);
unless ( $didmeta ++ ) {
$db->clearBeaconMeta($_) foreach $db->beaconfields()};
$db->setBeaconMeta($key, $val) if $val; # set!
}
elsif ( /^([A-Z][\w-]+):\s*(.*)$/ ) {
my ($key, $val) = ($1, $2);
unless ( $didosd ++ ) {
$db->clearOSD($_) foreach $db->osdKeys()};
$db->addOSD($key, $val) if $val; # add!
}
elsif ( /^\s*$/ ) { # don't care
}
else {
warn "cannot parse: >$_<"}
}
close(TPL);
if ( $didosd ) {
refreshOSD(1) unless $keeposd}
elsif ( $didmeta ) {
refreshOSD($force) unless $keeposd};
}
=item C<dumposd>
Shows Open Search Description.
=item C<refreshosd>
Recalculates default Open Search Description (list constituents as
description, give counts, find examples, ...)
=cut
elsif ( ($action eq "dumposd") or ($action eq "refreshosd") ) {
refreshOSD(1) if $action eq "refreshosd";
binmode(STDOUT, ":utf8");
if ( my $resultref = $db->OSDValues() ) {
foreach ( sort keys %$resultref ) {
print "$_ => ";
if ( !defined $resultref->{$_} ) {
print "<undef>"}
elsif ( !ref($resultref->{$_}) ) {
print ">".$resultref->{$_}."<"}
else {
foreach my $item ( @{$resultref->{$_}} ) {
print "\n- ";
if ( !defined $item ) {
print "<undef>"}
else {
print ">$item<"}
}
}
print "\n";
}
};
}
=item header
=cut
elsif ( $action eq "admin" ) {
if ( scalar @ARGV ) {
my $did = 0;
while ( scalar @ARGV && (my $field = shift @ARGV) ) {
my($key, $val) = split(/=/, $field, 2);
my $oldval = $db->admin($key, $val);
$oldval = "{undef}" unless defined $oldval;
my $changed = ((defined $val) && ($oldval ne $val)) ? "old value $oldval changed" : $oldval;
print "$key - $changed\n";
};
}
else {
my $admref = $db->admin();
foreach ( sort keys %$admref ) {
print "$_ : $admref->{$_}\n"}
}
}
=item C<deflate>
Interface to the C<deflate()> method (DEFLATE+REINDEX+ANALYZE the
SQLite database);
=cut
elsif ( $action eq "deflate" ) {
$db->deflate}
=item C<version>
Dump some version numbers
=cut
elsif ( $action eq "version" ) {
print "sasbactrl version $VERSION\n";
print "SeeAlso::Source::BeaconAggregator version $SeeAlso::Source::BeaconAggregator::VERSION\n";
print "SeeAlso::Source::BeaconAggregator::Maintenance version $SeeAlso::Source::BeaconAggregator::Maintenance::VERSION\n";
print "SeeAlso::Identifier version $SeeAlso::Identifier::VERSION\n";
print "SeeAlso::Identifier::GND version $SeeAlso::Identifier::GND::VERSION\n";
}
=item C<noop>
Do nothing (you may want for exit status or use some --pragma's)
=cut
elsif ( $action eq "noop" ) {
print "OK!\n" if $verbose;
}
=back
=cut
else {
pod2usage({-message => "Unsupported action '$action'", -exitval => 3})}
sub refreshOSD { #
# if ( $keeposd ) {
# print "NOT refreshing OSD data\n" if $verbose;
# return 0;
# }
my ($doexamples) = @_;
my $anyref = $db->RepoCols();
my $repcnt = scalar keys %$anyref;
print "Refreshing OSD data from $repcnt sources: ";
print "DateModified... " if $verbose;
$db->setOSD("DateModified", time());
my $gcounti = $db->admin('gcounti');
unless ( $gcounti ) {
$gcounti = $db->idStat(undef, distinct => 0) || 0;
$gcounti && $db->admin('gcounti', $gcounti) unless $keeposd;
};
my $gcountu = $db->admin('gcountu');
unless ( $gcountu ) {
$gcountu = $db->idStat(undef, distinct => 1) || 0;
$gcountu && $db->admin('gcountu', $gcountu) unless $keeposd;
};
print "collect Descriptions... " if $verbose;
my $exclprefix = " [excluding: ";
my %descr;
my $countref = $db->RepoCols("_countu");
my $aliasref = $db->RepoCols("_alias");
my $sortref = $db->RepoCols("_sort");
foreach ( keys %$aliasref ) {
$sortref->{$_} ||= $aliasref->{$_}};
my $textref = $db->RepoCols("NAME");
foreach ( keys %$textref ) {
$descr{$_} ||= $textref->{$_} if defined $textref->{$_}};
$textref = $db->RepoCols("DESCRIPTION");
foreach ( keys %$textref ) {
$descr{$_} ||= $textref->{$_} if defined $textref->{$_}};
$textref = $db->RepoCols("INSTITUTION");
foreach ( keys %$textref ) {
$descr{$_} ||= $textref->{$_} if defined $textref->{$_}};
$textref = $db->RepoCols("FEED");
foreach ( keys %$textref ) {
$descr{$_} ||= $textref->{$_} if defined $textref->{$_}};
$textref = $db->RepoCols("_alias");
foreach ( keys %$textref ) {
$descr{$_} ||= "Listed by $textref->{$_}" if defined $textref->{$_}};
foreach ( keys %$countref ) {
next unless $descr{$_};
if ( $countref->{$_} ) {
$descr{$_} .= " [".$countref->{$_}."]"}
else {
print $exclprefix . ($aliasref->{$_} || $_) if $verbose;
$exclprefix = ", ";
delete $descr{$_};
};
};
print "] ... " if $verbose && $exclprefix eq ", ";
my $rcount = scalar keys %descr;
print "set Descriptions... " if $verbose;
$db->setOSD("Description", "Currently serving $gcountu distinct identifiers from $rcount beacon sources:\n -- "
.join(".\n -- ", map { $descr{$_} } sort { $sortref->{$a} cmp $sortref->{$b} } keys %descr)
);
my $srcref = $db->RepoCols("FEED");
my @vals = map { ($_ && /\S/) ? ($_) : () } values %$srcref;
print "set Sources... " if $verbose;
$db->setOSD("Source", @vals);
unless ( $doexamples ) {
print "(skipping examples) ... done\n";
return 1;
};
return 1 unless $repcnt; # no repositories => no examples
print "Examples from Sources [" if $verbose;
$db->clearOSD("Examples");
my (%seen, %covered);
my $explref = $db->RepoCols("EXAMPLES");
my $max = 0;
foreach my $sq ( sort {$b <=> $a} keys %$explref ) {
my $expl = $explref->{$sq};
next unless defined $expl;
print "$sq:" if $verbose;
my $cnt = 0;
foreach my $id ( sort {$b cmp $a} map { $_ ? $_ : () } split(/\s*\|\s*/, $expl) ) {
print "." if $verbose;
my ($normid, @sequences) = $db->idList($id);
if ( $normid ) { # flush iterator
warn "assertion failed (examples from seqno $sq): multiple lists for $id" while $db->idList(); # Flush iterator
}
else {
print "\n\t[normalizing example >$id< for seqno $sq failed!]\n";
$db->addOSD("Examples", "$id");
warn "assertion failed (examples from seqno $sq): multiple lists for $id" while $db->idList(); # Flush iterator
next;
};
#warn "$sq: $id already seen" if $seen{$normid};
next if $seen{$normid}++;
my ($i, $j, $new) = (0, 0, 0);
foreach my $seqref ( @sequences ) {
$i ++;
$j += ($seqref->[1] || 1);
next if $covered{$seqref->[0]};
#warn "$sq: yet uncovered $seqref->[0] for $id/$normid";
$new = $covered{$seqref->[0]} = 1;
};
#warn "$sq: all covered for $id/$normid" unless $new;
next unless $new;
print "+" if $verbose;
$db->addOSD("Examples", "$id|$i/$j");
$max = $i if $i > $max;
last;
};
print " " if $verbose;
};
print "] ... Examples by coincidence" if $verbose;
my $k = sprintf("%.0f", exp(log($repcnt)*4/5)); # 4 out of 5, 23 out of 50, 40 out of 100
$k = $EXAMPLEGOAL if $EXAMPLEGOAL and ($k > $EXAMPLEGOAL);
print " [goal: clusters of size >$k:" if $verbose;
my $eref;
my $i = $max || 0;
my $j = $i +1;
my $sthcache = "";
print " $i" if $i && $verbose;
while ( ($j <= $k) and (my $tmperef = $db->findExample($j, 0, $sthcache)) ) {
$eref = $tmperef;
$eref->{response} =~ /^(\d+)\b/;
my $this = $1 || $j;
print " $this" if $verbose;
$i = $this;
$j = $this +1;
};
print "= $eref->{id}" if $verbose && $eref;
print "]" if $verbose;
while ( $i ) {
print " ($i)" if $verbose;
if ( $eref ) {
$seen{$eref->{id}} || $db->addOSD("Examples", $eref->{id}."|".$eref->{response})};
last if $i <= $EXAMPLEGOAL; # no more examples if max is max...
$i = $EXAMPLEGOAL if $EXAMPLEGOAL and ($i > $EXAMPLEGOAL);
print "[10?]" if $verbose;
($eref = $db->findExample($i, 10, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response})
: (($eref = $db->findExample($i, 1, $sthcache)) && $db->addOSD("Examples", $eref->{id}."|".$eref->{response}),
last);
print "[100?]" if $verbose;
($eref = $db->findExample($i, 100, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response}) : last;
print "[1000?]" if $verbose;
($eref = $db->findExample($i, 1000, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response}) : last;
print "[10000?]" if $verbose;
($eref = $db->findExample($i, 10000, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response}) : last;
print "[100000?]" if $verbose;
($eref = $db->findExample($i, 100000, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response}) : last;
print "[1000000?]" if $verbose;
($eref = $db->findExample($i, 1000000, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response}) : last;
$i = 0;
};
print " done\n";
return 1;
}
sub policy {
my ($st) = @_;
my @xstat; # future events
# 1a. timestamps deleted: force reload
if ( !$st->{"ftime"} ) {
push(@xstat, [$^T, 'FORCE', "!reload forced (no _ftime)"]);
}
elsif ( !$st->{"mtime"} ) {
push(@xstat, [$^T, 'FORCE', "!reload forced (no _mtime)"]);
};
# 1b. force reload by time conditions
my $polmax = $st->{"rtime"} ? $REVISITFORCE : $NOREVISITFORCE;
push(@xstat, [$st->{"ftime"} +$polmax, 'DATE', sprintf("+forced reload policy %.1fw", $polmax/3600/24/7)]) if $st->{"ftime"};
if ( $st->{"rtime"} ) {
# 2. revisit reasoning: Hold/force reload
my $text;
if ( $^T < $st->{"rtime"} ) {
$text = "-Respect"}
elsif ( $st->{"ftime"} && ($st->{"ftime"} < $st->{"rtime"}) ) { # force if never fetched after $rtime
$text = "+Expired"}
else {
$text = "*Stale*"};
my $sxtime = $st->{"stime"} || $st->{"mtime"}; # prefer TIMESTAMP for difference to REVISIT
push(@xstat, [$st->{"rtime"}, 'RVST', sprintf("%s REVISIT hint (%s %s)", $text ,tdist($st->{"rtime"} -$sxtime))]);
if ( $text =~ /Stale/ ) { # additional age reasoning if Stale Revisit hint
# 2a. wait at least 5% of age as concluded from expired revisit hint
my $delta = ($st->{"ftime"} -$st->{"rtime"}) / 20;
push(@xstat, [$st->{"ftime"} +$delta, 'FREQ', sprintf("age policy 5%% (%s %s)", tdist($delta))]);
}
}
elsif ( $st->{"ftime"} ) {
# 3. reasoning on file age and update frequency
if ( my $xstime = $st->{"mtime"} || $st->{"stime"} ) { # prefer "last modified" over #TIMESTAMP
# 3a. wait at least 5% of age as concluded from last fetch
my $delta = ($st->{"ftime"} -$xstime) / 20;
push(@xstat, [$st->{"ftime"} +$delta, 'FREQ', sprintf("age policy 5%% (%s %s)", tdist($delta))]);
};
# 3b. wait at least $NOREVISITWAIT before reload
my $btime = $st->{"ftime"};
# try to get close to mtime
$btime = $st->{"mtime"} if $st->{"mtime"} && ($st->{"ftime"} -$st->{"mtime"} < $NOREVISITWAIT);
push(@xstat, [$btime +$NOREVISITWAIT, 'WAIT', sprintf("revisit policy %.1fh", $NOREVISITWAIT/3600)]);
};
# 4. wait at least $RETRYWAIT before next attempt
push(@xstat, [$st->{"utime"} +$RETRYWAIT, 'WAIT', sprintf("retry policy %.1fh", $RETRYWAIT/3600)]) if $st->{"utime"};
my ($forced, $xtime, $maxtime) = (0, 0, 0);
my @conds;
foreach ( sort {$a->[0] <=> $b->[0]} @xstat ) {
my($stamp, $code, $message) = @$_;
$forced = 1 if $message =~ /^!/;
if ( $stamp < $^T ) { # expired conditions
$forced ||= -1 if $message =~ /^\+/;
push(@conds, [$stamp, $code, $message]);
$xtime = $stamp;
}
else {
push(@conds, [$stamp, $code, $message]);
if ( $message =~ /^\+/ ) {
$maxtime ||= $stamp}
elsif ( $maxtime ) {
$xtime = $maxtime}
else {
$forced = 0 if $forced < 0;
$xtime = $stamp;
}
}
};
return ($forced, $xtime, \@conds);
}
sub lct {
return "---" unless $_[0];
if ( $_[0] =~ /^\d+(\.\d+)?$/ ) {
return strftime("%a, %F %T %z", localtime($_[0]))}
else {
return strftime("%a, %F %T %z", localtime(HTTP::Date::str2time($_[0], "GMT")))};
}
sub tdist {
local($_) = @_;
my $vz = "+";
if ( $_ < 0 ) {
$_ = -$_;
$vz = "-";
};
my @elems = ();
unshift(@elems, ($_ % 60)."s");
$_ = int($_ / 60) or return vzfix($vz, @elems);
unshift(@elems, ($_ % 60)."m");
$_ = int($_ / 60) or return vzfix($vz, @elems);
unshift(@elems, ($_ % 24)."h");
$_ = int($_ / 24) or return vzfix($vz, @elems);
my ($y, $m, $w);
if ( $_ > 365 * 1.5) {
$y = int($_ / 365); $_ %= 365};
if ( $_ > 30 * 1.5 ) {
$m = int($_ / 30); $_ %= 30};
if ( $_ > 7 * 1.5 ) {
$w = int($_ / 7); $_ %= 7};
unshift (@elems, $_."d") if $_;
unshift (@elems, $w."w") if $w;
unshift (@elems, $m."M") if $m;
unshift (@elems, $y."Y") if $y;
return vzfix($vz, @elems);
}
sub vzfix {
my $vz = shift @_;
substr($_[0], 0, 0) = $vz;
return (@_, "", "", "");
}
=head1 AUTHOR
Thomas Berger
CPAN ID: THB
gymel.com
THB@cpan.org
=head1 COPYRIGHT
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=head1 SEE ALSO
SeeAlso::Source::BeaconAggregator::Maintenance
SeeAlso::Source::BeaconAggregator::Publisher
=cut
1;
### THE END ###