The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w
# $Id: $
use lib "./lib";
use strict;
use utf8;
use warnings;
use POSIX qw(strftime);
use HTTP::Date;
use SeeAlso::Identifier;
use SeeAlso::Source::BeaconAggregator::Maintenance;
use Getopt::Long;
use Pod::Usage;

our $VERSION = '0.2_83';

##### Compatibility
use SeeAlso::Identifier::GND;
if ( $SeeAlso::Identifier::GND::VERSION eq "0.54" ) {
    no strict;
    eval <<'XxX' or die "tweak failed: $@";
#!!! Have to tweak internals of SeeAlso::Identifier::GND for successful operation !!!
    *SeeAlso::Identifier::GND::canonical = \&SeeAlso::Identifier::GND::normalized;
    *SeeAlso::Identifier::GND::hash = \&SeeAlso::Identifier::GND::indexed;
#!!! !!!#
XxX
}
#####

my $verbose = 1;
my $ignore_header_errors = 0;
my $action = "list";
my $dbroot = "./beacondbs";
my ($dsn, $cache_size, @pragmas, @presets);
my ($distinct, $force, $keeposd, $quiet, $dbfile, $itype, $redirs);

#  Application constants
our $EXAMPLEGOAL = 100;             # don't attempt to find examples with higher hit count

our $RETRYWAIT = 213*60;            # w/o #REVISIT hint: never attempt more often (3h33')
our $NOREVISITWAIT = 1344*60;       # w/o #REVISIT hint: never revisit sooner (22h22')

our $REVISITFORCE = 4*30*24*3600;   # with #REVISIT hint: force revisit after (4m)
our $NOREVISITFORCE = 4*7*24*3600;  # w/o #REVISIT hint: force revisit after (4w)

=head1 NAME

sasbactrl - command line interface to SeeAlso::Source::BeaconAggregator and
            auxiliary classes

=head1 SYNOPSIS

 sasbactrl [Options] [Action [Args ...]]

Actions and Arguments:
  init [--[no]redirs] 
       [--itype type] Set up Database for dsn (dbroot must exist)

  load alias [uri [file]]
  update  [pattern | seqno_or_alias [uri]]

  status  [seqno/alias/pattern]     parts of "list" and "headers"

  unload  seqno/alias/pattern
  purge   seqno/alias/pattern
  list    [seqno/alias/pattern]

  headers [seqno/alias/pattern]
  header  seqno/alias/pattern [field=val ...]

  dumposd     show OpenSearchDescription
  refreshosd  refresh (then show) OpenSearchDescription

  dumpmeta [cgi_base_url ...]     show beacon header
  beacon cgi_base_url [uAformatname]

  loadmeta  header_template_file        load OSD and Beacon meta constants

  idststat [seqno/alias/pattern]
  idcounts [identifier/pattern]
  idlist [identifier/pattern]

  admin [field[=value]]      list/get/set admin properties:
     DATA_VERSION
     IDENTIFIER_CLASS

  deflate       Apply VACUUM and REINDEX on SqLite Db 

  version       gives versions numbers of involved modules

  noop          do nothing (e.g. if --pragma's do all the work)

Options:
  (--dbfile | --dsn )
  --force
  --keeposd
  --verbose|--quiet
  --itype
  --dbroot     ($dbroot)
  --ignore-header-errors  [for update and load]
  --distinct  [for idcounts and idstat]
  --preset HEADER=value  [for "beacon", repeatable]

  --pragma [SQLite pragma]  (repeatable)
  --cache_size  [SQLite pragma value]
=cut

=head1 OPTIONS

=head2 Global options for (almost) all types of actions

=over 8

=item B<--dbfile> I<filename>

Name of database file relative to C<dbroot>

=item B<--dsn> I<basename>

Database file will be the file I<basename-db> in folder I<basename>,
giving SQLite enough headroom for temporary files. 
Relative to C<dbroot>.


=item B<--dbroot>

Prefix for --dbfile or --dsn (relative paths only)


=item B<--force>

Use with care.

=item B<--verbose>

Sometimes shows more.

=item B<--quiet>

Sometimes shows less.

=item B<--itype>

Identifier type (corresponding subclass of SeeAlso::Identifier must be installed)

=item B<--redirs=[0|1]>

Prepare additional redirection index

=item --ignore-header-errors

[for update and load]

=item --distinct

[for idcounts and idstat]

=back

=cut

GetOptions (
  'verbose+' => \$verbose,
  'quiet' => \$quiet,
  'distinct' => \$distinct,
  'force' => \$force,
  'keeposd' => \$keeposd,
  'ignore-header-errors' => \$ignore_header_errors,
  'redirs!' => \$redirs,
  'itype=s' => \$itype,
  'dbfile=s' => \$dbfile,
  'dbroot=s' => \$dbroot,
  'cache_size=s' => \$cache_size,
  'pragma=s' => \@pragmas,
  'preset=s' => \@presets,
  'dsn=s' => \$dsn,
) or pod2usage({-message => 'Invalid option encountered', -exitval => 2});

if ( @ARGV && $ARGV[0] !~ /^-/ ) {
    $action = shift @ARGV;
    $action =~ s/\s$//;
}

$verbose = 0 if $quiet;
$dbroot ||= "";
$dbfile ||= "";
$dsn ||= "";

local($|) = 1 if $verbose;

my %passopts = (
  ($distinct ? ( 'distinct' => $distinct) : ()),
  ($force ? ( 'force' => $force) : ()),
  ($verbose ? ( 'verbose' => $verbose) : ()),
  ($ignore_header_errors ? ( 'ignore-header-errors' => $ignore_header_errors) : ()),
);


if ( $dsn ) {
    die "dbroot directory $dbroot does not exist" unless -d $dbroot;
    $dbroot .= "/" unless $dbroot =~ m!/$!;
    if ( -d $dbroot && ( ! -d "$dbroot$dsn" ) ) {
        if ( $action eq "init" ) {
            print "Will create database ${dsn}-db in $dbroot$dsn";
            mkdir "$dbroot$dsn" or die "failed creating database dir $dbroot$dsn";
          }
        else {
            die "no database initialised for dsn $dsn in $dbroot tree"};
      }
  }      

$itype ||= "";
my $iclass;
if ( $itype ) {
    my $package = "SeeAlso::Identifier::".$itype;
    eval {
        (my $pkgpath = $package) =~ s=::=/=g;  # require needs path...
        require "$pkgpath.pm";
        import $package;
      };
    if ( $@ ) {
        die "sorry: Identifier Class $package cannot be imported\n$@"};
    $iclass = $package->new();    # don't test the outcome (blessed scalar!)
    die "sorry: cannot setup instance for Identifier Class $package" unless defined $iclass;
  }
else {
    # $iclass = SeeAlso::Identifier->new();   # don't set, let it auto-initialize
  };

my $db = SeeAlso::Source::BeaconAggregator::Maintenance->new(
        'dbroot' => $dbroot,
        $dbfile ? ('file' => $dbfile) : (),
        $dsn ? ('dsn' => $dsn) : (),
        ref($iclass) ? ('identifierClass' => $iclass) : (),
        'accept' => {
		'VERSION' => qr'0\.[12]',
		 'FORMAT' => '^([GP]ND-)?B(?i:EACON)$'},
        'verbose' => $verbose,
    ) or die "could not open database $dsn / $dbfile";

$db->{dbh}->do("PRAGMA cache_size = $cache_size;") or die "could not set cache_size to $cache_size" if defined $cache_size;

foreach my $pragma ( @pragmas ) {
    $pragma =~ s/^(['"])(.+)\1$/$2/;
    $pragma =~ s/\s*;\s*$//;
#   print "execute 'PRAGMA $pragma;':\n" if $verbose;
    my $prows = $db->{dbh}->selectall_arrayref("PRAGMA $pragma;") or warn "trouble executing 'PRAGMA $pragma;'";
#   my $i;
    foreach my $rowref ( @$prows ) {
#       warn "[".++$i."] - "."@$rowref\n";
        print "@$rowref\n";
      };
  };

=head2 Actions

These are typically an interface to methods of SeeAlso::Source::BeaconAggregator::Maintenance
or of SeeAlso::Source::BeaconAggregator::Publisher

=over 8

=item C<init>

Creates a database with the necessary tables

=cut

if ( $action eq "init" ) {
    $db->init(
       ((defined $redirs) ? ( 'prepareRedirs' => $redirs) : ()),
    ) or die("could not setup database structure")}

=item C<list>

Interface to the C<listCollections()> method.

List all loaded beacon files with identifier counts.

The optional parameter may specify a SQL-Pattern to restrict
the result.

=cut

elsif ( $action eq "list" ) {
    my $seqno_or_alias = shift @ARGV;
    while (my ($sq, $alias, $uri, $mtime, $counti, $countu) = $db->listCollections($seqno_or_alias)) {
        my $mt = lct($mtime);
        $counti ||= 0;
        $countu ||= 0;
        my $count = ($counti == $countu) ? $countu : "$countu/$counti";
        print "$sq\t$alias\t$mt\t($count)\n\t$uri\n";
      };
  }

=item C<status>

Prints the complete administrative fields and beacon fields and overview
of the revisit policy for the beacon source(s) given by the first argument.

The first argument may be empty or a pattern to produce the information for
more than one source.

=cut

elsif ( $action eq "status" ) {
    my $seqno_or_alias = shift @ARGV;
    binmode(STDOUT, ":utf8");

    printf("Load status for %s (%s)\n", $dsn, lct($^T));

    while ( my ($hashref, $metaref) = $db->headers($seqno_or_alias) ) {
        last unless defined $hashref;
        printf("\n#%u\t%s\t%s\t%s\t(%u/%u i/u)\n", $metaref->{_seqno},
                                          $metaref->{_alias} || "",
                                          $metaref->{_sort} || "",
                                          $metaref->{_mtime} || "???",
                $metaref->{_counti} || 0, $metaref->{_countu} || 0);

        printf(" < \t%s\n", $hashref->{FEED}) if $hashref->{FEED};
        printf(" << \t%s\n", $metaref->{_uri} || "???") unless $hashref->{FEED} && $metaref->{_uri} && ($hashref->{FEED} eq $metaref->{_uri});
        printf(" <<< \t%s\n", $metaref->{_ruri} || "???") unless $metaref->{_uri} && $metaref->{_ruri} && ($metaref->{_uri} eq $metaref->{_ruri});
        printf(" ! \t%s\n", $metaref->{_admin}) if $metaref->{_admin};

        my @dates;
        my %stamps;
        if ( $stamps{"stime"} = HTTP::Date::str2time($hashref->{TIMESTAMP}, "GMT") || 0 ) {
            push(@dates, [2, "TIMESTAMP header", $stamps{"stime"}, ""])}
        else {
            push(@dates, [0, "[no TIMESTAMP header]", 0, ""])};

        if ( $stamps{"rtime"} = HTTP::Date::str2time($hashref->{REVISIT}, "GMT") || 0 ) {
            push(@dates, [2, "REVISIT header", $stamps{"rtime"}, ""])}
        else {
            push(@dates, [8, "[no REVISIT header]", 0, ""])};
        
        $stamps{"mtime"} = HTTP::Date::str2time($metaref->{_mtime}, "GMT") || 0;
        push(@dates, [2, "Current copy modified", $stamps{"mtime"}, ""]);

        $stamps{"ftime"}= HTTP::Date::str2time($metaref->{_ftime}, "GMT") || 0;
        my $fstat = $metaref->{_fstat};
        push(@dates, [2, "Current copy loaded", $stamps{"ftime"}, $fstat]);

        $stamps{"utime"} = HTTP::Date::str2time($metaref->{_utime}, "GMT") || 0;
        my $ustat = $metaref->{_ustat};
        push(@dates, [2, "Last update attempt", $stamps{"utime"}, $ustat]);

        my($forced, $xtime, $statref) = policy(\%stamps);
        
        my $xstime = $stamps{"mtime"} || $stamps{"stime"};
        printf(" - Age: %s %s %s\n", tdist($^T -$xstime));
        foreach ( @$statref ) {
            my ($stamp, undef, $message) = @$_;
            my $sign = ($stamp < $^T) ? "+" : "-";
            $sign = "o" if ($sign eq "-") and ($message =~ /^\+/);
            printf(" (%s) Reload condition: %s: %s %s\n", $sign, $message, tdist($stamp -$^T));
            next if $stamp < $^T;         # dont list past events
            next if $message =~ /^-/;     # suppress REVISIT pleonasm
            $message =~ s/ policy.*$//;
            $message =~ s/^\W//;
            push(@dates, [2, $message, $stamp, ""]);
          };

        if ( $forced > 0 ) {
            push(@dates, [9, "[Reload forced]", 0, ""])}
        elsif ( $xtime > $^T ) {
            push(@dates, [9, "=> next revisit after", $xtime, ""])}
        elsif ( $forced ) {
            push(@dates, [9, "=> Ready for reload!", 0, ""])}
        else {
            push(@dates, [9, "=> Ready for revisit", 0, ""])};

        foreach my $lineref ( sort { $a->[0] <=> $b->[0]
                                  || $a->[2] <=> $b->[2]
                                  || $a->[1] cmp $b->[1]
                                   } @dates ) {
            if ( my $stat = $lineref->[3] ) {
                $stat =~ s/replaced/rpl/;
                $stat =~ s/deleted/del/;
                $stat =~ s/duplicate/dup/;
                $stat =~ s/ignored/ign/;
                $stat =~ s/invalid/inv/;
                printf(" * %-24s %30s\t%s %s %s\n **\t[%s]\n", $lineref->[1], lct($lineref->[2]), ($lineref->[2] ? @{[tdist($lineref->[2] -$^T)]}[0..2] : ("", "", "")), $stat);
              }
            else {
                printf(" * %-24s %30s\t%s %s %s\n", $lineref->[1], lct($lineref->[2]), ($lineref->[2] ? tdist($lineref->[2] -$^T) : ("", "", "")));
              };
          };
      }
  }

=item C<headers>

Interface to the C<headers()> method.

Prints all OSD fields and then all beacon fields.

The first argument is a sequence number, alias or pattern.

See C<header> for restricting output to individual fields.

=cut

elsif ( $action eq "headers" ) {
    my $seqno_or_alias = shift @ARGV;
    binmode(STDOUT, ":utf8");

    while ( my ($hashref, $metaref) = $db->headers($seqno_or_alias) ) {
        last unless defined $hashref;
        print "\n";
        while (my ($key, $val) = each %$hashref ) {
            next unless defined $val && $val && $val !~ /^\s+$/;
            print "#$key: $val\n";
          };
        while (my ($key, $val) = each %$metaref ) {
####### TEMP CODE _count #########
            next if $key eq "_count";
            next unless defined $val;
            print "$key - $val\n";
          };
        print "\n";
      };
  }


=item C<header>

Interface to the C<headerfield()> method.

The first parameter is a mandatory and specifies a sequence number,
alias or pattern (e.g. "C<%>") to operate on.

Further arguments are of the form I<fieldname> or I<fieldname>=C<value>
(value may be empty):
For each of these arguments output a line consisting of the field name, " - "
and the list of all values encountered. Subsequently, if the assignment form of the
argument is given, the corresponding field is set to C<value> for all
sequences met.

Otherwise (no list of field names or assignments is specified), output will be
a list of all I<alias> fields, roughly corresponding to C<headerfield "%" _alias>.

See C<headers> for listing all fields defined for given sequences.

=cut

elsif ( $action eq "header" ) {
    my $seqno_or_alias = shift @ARGV or die "Please specify alias or number";
    if ( @ARGV ) {
        my $did = 0;
        while ( scalar @ARGV && (my $field = shift @ARGV) ) {
            my($key, $val) = split(/=/, $field, 2);
            my($rows, @vals) = $db->headerfield($seqno_or_alias, $key, $val);
            print "$key - @vals\n" if @vals && $verbose;
            $did ++ if (defined $val) && $rows && (defined $vals[0]) && ($val ne $vals[0]);
          };
        if ( $did ) {
            print "Changed $did fields for $seqno_or_alias\n";
            refreshOSD(0) unless $keeposd;
          };
      }
    else {       # Simply dump a list of aliases
       my($rows, @vals) = $db->headerfield($seqno_or_alias, "_alias");
       print "@vals\n"
      };
  }

=item C<load>

  load I<alias>, I<uri>, I<datafile>

Interface to the C<loadFile()> or C<update()> methods

Load beacon data from Uri (if I<uri> is given) or physical file
from disk (I<datafile> must be given). 

The beacon file will under the alias I<alias> be ready for 
later updates.


=cut

elsif ( $action eq "load" ) {
    my $alias = shift @ARGV || "";
    my $uri = shift @ARGV || "";
    my ($cn, $cc);
    if ( my $datafile = shift @ARGV ) {
        ($cn, $cc) = $db->loadFile($datafile, {_alias => $alias, _uri => $uri}, %passopts)}
    elsif ( $uri ) {
        ($cn, $cc) = $db->update($alias || $uri, {_uri => $uri}, %passopts)}
    else {
        die ("Please specify an uri and optionally a beacon data file")};
    die "Load failed\n" unless $cn;
    print "Collection Number $cn ($cc records)\n";
    refreshOSD($force) unless $keeposd;
  }

=item C<update>

Interface to the C<update()> method

(Re)load beacon files with known URIs according
to the following policy: Respect the #REVISIT hint, assume a #REVISIT
period of 24h for beacon files without. Wait at least 5% of the file's
age (difference between last updateate attempt and modification time)
before trying again. Wait at least 4h anyway. However force reload
after a couple of months.

The C<update> method itself detects illegal headers and if the 
beacon file was modified at all and may skip loading the file.
Use the C<--force> option to override the test on modification time
or clear the C<_mtime> field.

When called with no arguments, update B<all> beacon files.

=cut

elsif ( $action eq "update" ) {
    my $seqno_or_alias = shift @ARGV;
    my $did;
    if ( $seqno_or_alias ) {
        my $uri = shift @ARGV || "";
        if ( $uri ) {
            die "no wildcards allowed in seqno_or_alias when uri is provided: $seqno_or_alias" if $seqno_or_alias =~ /%/;
            print "\n==> $seqno_or_alias ($uri) <==\n" if $verbose;
            my ($cn, $cc) = $db->update($seqno_or_alias, {_uri => $uri}, %passopts);
            if ( $cn ) {
                $did ++;
                print "Collection Number $cn ($cc records)\n" if $cn;
              }
          }
        else {
            my $aliasref = $db->RepoCols("_alias", $seqno_or_alias);
            foreach ( sort {$a <=> $b} keys %$aliasref ) {
                my $item = $aliasref->{$_};
                print "\n==> $item (auto) <==\n" if $verbose;
                my ($cn, $cc) = $db->update($item, {}, %passopts);
                if ( $cn ) {
                    printf "%2u: %s successfully updated (new SeqNo %u, %u records)\n", ++$did, $aliasref->{$_}, $cn, $cc}
              };
          };
      }
    else {
        my $aliasref = $db->RepoCols("_alias");
        my $uriref = $db->RepoCols("_uri");
        my ($revisitref, $updateref, $fetchedref, $modifiedref, $timestampref);

        $revisitref = $db->RepoCols("REVISIT") || 0;
        $updateref = $db->RepoCols("_utime") || 0;
        $fetchedref = $db->RepoCols("_ftime") || 0;
        $modifiedref = $db->RepoCols("_mtime") || 0;
        $timestampref = $db->RepoCols("TIMESTAMP") || 0;

        foreach ( sort {$a <=> $b} keys %$aliasref ) {
            print $verbose ? "\n==> $aliasref->{$_} <==\n" : sprintf("%-10s - ", $aliasref->{$_});
            unless ( $uriref->{$_} ) {
                print "no uri known -> skipped\n";
                next;
              };
            my %stamps = (
                "stime" => $timestampref->{$_},
                "mtime" => $modifiedref->{$_},
                "ftime" => $fetchedref->{$_},
                "utime" => $updateref->{$_},
                "rtime" => $revisitref->{$_},
              );
            my $xstime = $stamps{"mtime"} || $stamps{"stime"}; # prefer "last modified" in favor of #TIMESTAMP
            my $age = join("", (tdist($^T -$xstime))[0,1]);

            my @states;
            push(@states, sprintf("[%-26s %30s]\n", "Current Revisit hint", lct($stamps{"rtime"}))) if $stamps{"rtime"};
            push(@states, sprintf("[%-26s %30s]\n", "Last action performed", lct($stamps{"utime"}))) if $stamps{"utime"};
            push(@states, sprintf("[%-26s %30s]\n", "Current copy loaded", lct($stamps{"ftime"}))) if $stamps{"ftime"};
            push(@states, sprintf("[%-26s %30s]\n", "Current copy modified", lct($stamps{"mtime"}))) if $stamps{"mtime"};
            push(@states, sprintf("[%-26s %30s]\n", "Current timestamp header", lct($stamps{"stime"}))) if $stamps{"stime"};

            my $forcereload;
            unless ( $force ) {
                print @states if $verbose;

                my ($xtime, $statref);
                ($forcereload, $xtime, $statref) = policy(\%stamps);
                unless ( $forcereload ) {
                    my($status, $info) = ("" x 2);
                    foreach ( @$statref ) {
                        my ($stamp, $code, $message) = @$_;
                        next if $stamp < $^T;
                        next if $message =~ /^\+/;
                        $message =~ s/^\W//;
                        $message =~ s/ policy//;
                        ($status, $info) = ($code, $message);
                        if ( $verbose ) {
                            printf "[%4s] %-20s %30s -> skipped\n", $status, $info, lct($stamp)}
                      };

                    if ( $xtime > $^T ) {
                        unless ( $verbose ) {
                        printf "[%4s] next %30s (age: %s, %s)\n", $status, lct($xtime), $age, $info};
                        next;
                      };
                  }
              };

            if ( !$verbose ) {
                printf("[%4s] curr %30s (age: %s, %s)\n", "LOAD", lct(($stamps{"stime"} && ($stamps{"stime"} < $stamps{"mtime"})) ? $stamps{"stime"} : $stamps{"mtime"}), $age, "will reload");
              }
            elsif ( $force ) {
                print @states};
            my ($cn, $cc) = $db->update($aliasref->{$_}, {}, %passopts, ($forcereload ? ( 'force' => 1) : ()), 'nostat' => 1);
            if ( $cn ) {
                printf "%2u: %s successfully updated (new SeqNo %u, %u records)\n", ++$did, $aliasref->{$_}, $cn, $cc;
#               refreshOSD(0) unless $keeposd;     # will be done anyway since $did true now
              };
            print " ***\n";
          };
      };
    if ( $did or $force ) {
        refreshOSD( $force ? 1 : 0)};
  }

=item C<purge>

Interface to the <purge()> method.

Clears all beacon data for the seqnos or aliases given by
the first argument: The beacon source with its metadata
remains known to the database.

The argument may be a pattern, but the C<--force>
option is needed if actually more than one beacon file is
to be purged).

OSD fields are rebuild if something was done or C<--force>
is given.

=item C<unload>

Interface to the <unload()> method.

Clears all beacon data B<and> removes the listed headers
for the seqnos or aliases given by the first argument:
Any knowledge of the beacon source is effectively deleted
from the database.

The argument may be a pattern, but the C<--force>
option is needed if actually more than one beacon file is
to be unloaded.

OSD fields are rebuild if something was done or C<--force>
is given.

=cut
elsif ( ($action eq "unload") or ($action eq "purge") ) {
    my $seqno_or_alias = shift @ARGV or die "Please specify alias, pattern or SeqNo";
    if ( $db->$action($seqno_or_alias, %passopts) ) {
        refreshOSD(1) unless $keeposd}
    else {
        warn "did not do anything\n";
        refreshOSD($force) if $force and !$keeposd;
      };
  }

=item C<idstat>

Interface to the C<idStat()> method. 

Counts indentifiers, optionally only from those beacon files with seqno or alias restricted by the first argument.

Recognized options are C<distinct> and C<verbose>.

=cut

elsif ( $action eq "idstat" ) {
    print $db->idStat($ARGV[0], %passopts)." identifiers\n";}

=item C<idcounts>

Interface to the C<idCounts()> method. 

Counts identifiers, optionally only these identifiers which
match the pattern given by the first argument.

Recognized options are C<distinct> and C<verbose>.


=cut

elsif ( $action eq "idcounts" ) {
    while (my (@list) = $db->idCounts($ARGV[0], %passopts)) {
        print "@list\n"};
  }

=item C<idlist>

Interface to the C<idList()> method. 

For each identifier list a complicated list of entries found.
Optionally only these identifiers which match the pattern given 
by the first argument are shown.

=cut

elsif ( $action eq "idlist" ) {
    binmode (STDOUT, ":utf8");
    while (my (@list) = $db->idList(@ARGV)) {
        my $id = shift @list;
        print "$id";
        local($") = ",";
        foreach my $rowref ( @list ) {
            pop @$rowref while @$rowref && (! defined $rowref->[$#{$rowref}]);
            my @pretty = map { (defined $_) ? $_ : "" } @$rowref;
            print "\t@pretty";
          }
        print "\n";
      };
  }

=item C<dumpmeta>

Interface to the C<dumpmeta> method.

Lists all header fields from the database.


=item C<beacon>

Interface to the C<beacon()> method.

Produces a beacon file (header fields plus beacon entries)
for the database.

The Base URL for the service to be denoted is mandatory as
first parameter.

=cut

elsif ( ($action eq "dumpmeta") or ($action eq "beacon") ) {
    binmode(STDOUT, ":utf8");
    my $cgibase = shift @ARGV;
    unless ( $cgibase ) {
        if ( $action eq "beacon" ) {
            die "ERROR: You MUST provide a cgi base address\n"}
        else {
            warn "WARNING: You will have to provide a cgi base address\n"}
      };
    require SeeAlso::Source::BeaconAggregator::Publisher or die "could not require Publisher extension";
    SeeAlso::Source::BeaconAggregator::Publisher->activate();   # "recast" all objects
    my $preset = {'FORMAT' => 'PND-BEACON'};
    foreach my $pset ( @presets ) {
        $pset =~ s/^(['"])([^=]+)\1\s*=/$2=/;
        $pset =~ s/=\s*(['"])([^=]+)\1$/=$2/;
        $pset =~ s/^(['"])(.+)\1$/$2/;
        if ( $pset =~ /^([A-Z]+)\s*=\s*(.*)$/ ) {
             $preset->{$1} = $2}
        else {
             warn "ignoring --preset $pset\n"};
      }
    my ( $error, $headerref) = $db->$action($cgibase, @ARGV, $preset);
    if ( $headerref && ($action eq "dumpmeta") ) {
        print @$headerref};
  }

=item C<loadmeta>

Completely exchanges the OSD and beacon meta files for the 
database by the contents of the file given as first argument.

=cut

elsif ( $action eq "loadmeta" ) {
     my $tplfile = shift @ARGV or die "please supply a template file";
     open(TPL, "<:utf8", $tplfile) or die "cannot open template file $tplfile";
     my ($didosd, $didmeta) = (0, 0);
     while ( <TPL> ) {
         if ( /^#([A-Z][A-Z0-9-]*):\s*(.*)$/ ) {
             my ($key, $val) = ($1, $2);
             unless ( $didmeta ++ ) {
                 $db->clearBeaconMeta($_) foreach $db->beaconfields()};
             $db->setBeaconMeta($key, $val) if $val;    # set!
           }
         elsif ( /^([A-Z][\w-]+):\s*(.*)$/ ) {
             my ($key, $val) = ($1, $2);
             unless ( $didosd ++ ) {
                 $db->clearOSD($_) foreach $db->osdKeys()};
             $db->addOSD($key, $val) if $val;           # add!
           }
         elsif ( /^\s*$/ ) {                            # don't care
           }
         else {
             warn "cannot parse: >$_<"}
       }
     close(TPL);
     if ( $didosd ) {
         refreshOSD(1) unless $keeposd}
     elsif ( $didmeta ) {
         refreshOSD($force) unless $keeposd};
  }

=item C<dumposd>

Shows Open Search Description.

=item C<refreshosd>

Recalculates default Open Search Description (list constituents as
description, give counts, find examples, ...)


=cut
elsif ( ($action eq "dumposd") or ($action eq "refreshosd") ) {
    refreshOSD(1) if $action eq "refreshosd";
    binmode(STDOUT, ":utf8");
    if ( my $resultref = $db->OSDValues() ) {
        foreach ( sort keys %$resultref ) {
            print "$_ => ";
            if ( !defined $resultref->{$_} ) {
                print "<undef>"}
            elsif ( !ref($resultref->{$_}) ) {
                print ">".$resultref->{$_}."<"}
            else {
                foreach my $item ( @{$resultref->{$_}} ) {
                    print "\n- ";
                    if ( !defined $item ) {
                        print "<undef>"}
                    else {
                        print ">$item<"}
                  }
              }
            print "\n";
          }
      };
  }

=item header

=cut

elsif ( $action eq "admin" ) {
    if ( scalar @ARGV ) {
        my $did = 0;
        while ( scalar @ARGV && (my $field = shift @ARGV) ) {
            my($key, $val) = split(/=/, $field, 2);
            my $oldval = $db->admin($key, $val);
            $oldval = "{undef}" unless defined $oldval;
            my $changed = ((defined $val) && ($oldval ne $val)) ? "old value $oldval changed" : $oldval;
            print "$key - $changed\n";
          };
      }
    else {
        my $admref = $db->admin();
        foreach ( sort keys %$admref ) {
            print "$_ : $admref->{$_}\n"}
      }
  }

=item C<deflate>

Interface to the C<deflate()> method (DEFLATE+REINDEX+ANALYZE the
SQLite database);

=cut

elsif ( $action eq "deflate" ) {
    $db->deflate}


=item C<version>

Dump some version numbers

=cut

elsif ( $action eq "version" ) {
    print "sasbactrl version $VERSION\n";
    print "SeeAlso::Source::BeaconAggregator version $SeeAlso::Source::BeaconAggregator::VERSION\n";
    print "SeeAlso::Source::BeaconAggregator::Maintenance version $SeeAlso::Source::BeaconAggregator::Maintenance::VERSION\n";
    print "SeeAlso::Identifier version $SeeAlso::Identifier::VERSION\n";
    print "SeeAlso::Identifier::GND version $SeeAlso::Identifier::GND::VERSION\n";
  }

=item C<noop>

Do nothing (you may want for exit status or use some --pragma's)

=cut

elsif ( $action eq "noop" ) {
    print "OK!\n" if $verbose;
}

=back

=cut

else {
    pod2usage({-message => "Unsupported action '$action'", -exitval => 3})}



sub refreshOSD {    # 
#    if ( $keeposd ) {
#        print "NOT refreshing OSD data\n" if $verbose;
#        return 0;
#      }
    my ($doexamples) = @_;
    my $anyref = $db->RepoCols();
    my $repcnt = scalar keys %$anyref;
    print "Refreshing OSD data from $repcnt sources: ";

    print "DateModified... " if $verbose;
    $db->setOSD("DateModified", time());

    my $gcounti = $db->admin('gcounti');
    unless ( $gcounti ) {
        $gcounti = $db->idStat(undef, distinct => 0) || 0;
        $gcounti && $db->admin('gcounti', $gcounti) unless $keeposd;
      };
    my $gcountu = $db->admin('gcountu');
    unless ( $gcountu ) {
        $gcountu = $db->idStat(undef, distinct => 1) || 0;
        $gcountu && $db->admin('gcountu', $gcountu) unless $keeposd;
      };

    print "collect Descriptions... " if $verbose;
    my %descr;
    my $countref = $db->RepoCols("_countu");
    my $aliasref = $db->RepoCols("_alias");
    my $sortref = $db->RepoCols("_sort");
    foreach ( keys %$aliasref ) {
        $sortref->{$_} ||= $aliasref->{$_}};
    my $textref = $db->RepoCols("NAME");
    foreach ( keys %$textref ) {
        $descr{$_} ||= $textref->{$_} if defined $textref->{$_}};
    $textref = $db->RepoCols("DESCRIPTION");
    foreach ( keys %$textref ) {
        $descr{$_} ||= $textref->{$_} if defined $textref->{$_}};
    $textref = $db->RepoCols("INSTITUTION");
    foreach ( keys %$textref ) {
        $descr{$_} ||= $textref->{$_} if defined $textref->{$_}};
    $textref = $db->RepoCols("FEED");
    foreach ( keys %$textref ) {
        $descr{$_} ||= $textref->{$_} if defined $textref->{$_}};
    $textref = $db->RepoCols("_alias");
    foreach ( keys %$textref ) {
        $descr{$_} ||= "Listed by $textref->{$_}" if defined $textref->{$_}};
    foreach ( keys %$countref ) {
        next unless $descr{$_};
        if ( $countref->{$_} ) {
            $descr{$_} .= " [".$countref->{$_}."]"}
        else {
            print " [excluding ".($aliasref->{$_} || $_)."] " if $verbose;
            delete $descr{$_};
          };
      };
    my $rcount = scalar keys %descr;
    print "set Descriptions... " if $verbose;
    $db->setOSD("Description", "Currently serving $gcountu distinct identifiers from $rcount beacon sources:\n -- "
                               .join(".\n -- ", map { $descr{$_} } sort { $sortref->{$a} cmp $sortref->{$b} } keys %descr)
               );

    my $srcref = $db->RepoCols("FEED");
    my @vals =  map { ($_ && /\S/) ? ($_) : () } values %$srcref;
    print "set Sources... " if $verbose;
    $db->setOSD("Source", @vals);

    unless ( $doexamples ) {
        print "(skipping examples) ... done\n";
        return 1;
      };

    return 1 unless $repcnt;     # no repositories => no examples

    print "Examples from Sources [" if $verbose;
    $db->clearOSD("Examples");
    my (%seen, %covered);
    my $explref = $db->RepoCols("EXAMPLES");
    my $max = 0;
    foreach my $sq ( sort {$b <=> $a} keys %$explref ) {
        my $expl = $explref->{$sq};
        next unless defined $expl;
        print "$sq:" if $verbose;
        my $cnt = 0;
        foreach my $id ( sort {$b cmp $a} map { $_ ? $_ : () } split(/\s*\|\s*/, $expl) ) {
            print "." if $verbose;
            my ($normid, @sequences) = $db->idList($id);
            if ( $normid ) {   # flush iterator
                warn "assertion failed (examples from seqno $sq): multiple lists for $id" while $db->idList(); # Flush iterator
              }
            else {
                print "\n\t[normalizing example >$id< for seqno $sq failed!]\n";
                $db->addOSD("Examples", "$id");
                warn "assertion failed (examples from seqno $sq): multiple lists for $id" while $db->idList(); # Flush iterator
                next;
              };
#warn "$sq: $id already seen" if $seen{$normid};
            next if $seen{$normid}++;
            my ($i, $j, $new) = (0, 0, 0);
            foreach my $seqref ( @sequences ) {
                $i ++;
                $j += ($seqref->[1] || 1);
                next if $covered{$seqref->[0]};
#warn "$sq: yet uncovered $seqref->[0] for $id/$normid";
                $new = $covered{$seqref->[0]} = 1;
              };
#warn "$sq: all covered for $id/$normid" unless $new;
            next unless $new;
            print "+" if $verbose;
            $db->addOSD("Examples", "$id|$i/$j");
            $max = $i if $i > $max;
            last;
         };
        print " " if $verbose;
      };

    print "] ... Examples by coincidence" if $verbose;
    my $k = sprintf("%.0f", exp(log($repcnt)*4/5));  # 4 out of 5, 23 out of 50, 40 out of 100
    $k = $EXAMPLEGOAL if $EXAMPLEGOAL and ($k > $EXAMPLEGOAL);
    print " [goal: clusters of size >$k:" if $verbose;
    my $eref;
    my $i = $max || 0;
    my $j = $i +1;
    my $sthcache = "";
    print " $i" if $i && $verbose;
    while ( ($j <= $k) and (my $tmperef = $db->findExample($j, 0, $sthcache)) ) {
        $eref = $tmperef;
	$eref->{response} =~ /^(\d+)\b/;
        my $this = $1 || $j;
        print " $this" if $verbose;
        $i = $this;
	$j = $this +1;
      };
    print "= $eref->{id}" if $verbose && $eref;
    print "]" if $verbose;
    while ( $i ) {
        print " ($i)" if $verbose;
        if ( $eref ) {
            $seen{$eref->{id}} || $db->addOSD("Examples", $eref->{id}."|".$eref->{response})};
        last if $i <= $EXAMPLEGOAL;  # no more examples if max is max...
        $i = $EXAMPLEGOAL if $EXAMPLEGOAL and ($i > $EXAMPLEGOAL);
        print "[10?]" if $verbose;
        ($eref =     $db->findExample($i,  10, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response})
                                                  : (($eref = $db->findExample($i, 1, $sthcache)) && $db->addOSD("Examples", $eref->{id}."|".$eref->{response}),
                                                     last);
        print "[100?]" if $verbose;
        ($eref =    $db->findExample($i,  100, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response}) : last;
        print "[1000?]" if $verbose;
        ($eref =   $db->findExample($i,  1000, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response}) : last;
        print "[10000?]" if $verbose;
        ($eref =  $db->findExample($i,  10000, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response}) : last;
        print "[100000?]" if $verbose;
        ($eref = $db->findExample($i,  100000, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response}) : last;
        print "[1000000?]" if $verbose;
        ($eref = $db->findExample($i, 1000000, $sthcache)) ? $db->addOSD("Examples", $eref->{id}."|".$eref->{response}) : last;
        $i = 0;
      };
    print " done\n";
    return 1;
}

sub policy {
  my ($st) = @_;
  my @xstat;           # future events
  # 1a. timestamps deleted: force reload
  if ( !$st->{"ftime"} ) {
      push(@xstat, [$^T, 'FORCE', "!reload forced (no _ftime)"]);
    }
  elsif ( !$st->{"mtime"} ) {
      push(@xstat, [$^T, 'FORCE', "!reload forced (no _mtime)"]);
    };

  # 1b. force reload by time conditions
  my $polmax = $st->{"rtime"} ? $REVISITFORCE : $NOREVISITFORCE;
  push(@xstat, [$st->{"ftime"} +$polmax, 'DATE', sprintf("+forced reload policy %.1fw", $polmax/3600/24/7)]) if $st->{"ftime"};

  if ( $st->{"rtime"} ) {
  # 2. revisit reasoning: Hold/force reload
      my $text;
      if ( $^T < $st->{"rtime"} ) {
          $text = "-Respect"}
      elsif ( $st->{"ftime"} && ($st->{"ftime"} < $st->{"rtime"}) ) {   # force if never fetched after $rtime
          $text = "+Expired"}
      else {
          $text = "*Stale*"};
      my $sxtime = $st->{"stime"} || $st->{"mtime"};  # prefer TIMESTAMP for difference to REVISIT
      push(@xstat, [$st->{"rtime"}, 'RVST', sprintf("%s REVISIT hint (%s %s)", $text ,tdist($st->{"rtime"} -$sxtime))]);

      if ( $text =~ /Stale/ ) {    # additional age reasoning if Stale Revisit hint
      # 2a. wait at least 5% of age as concluded from expired revisit hint
          my $delta = ($st->{"ftime"} -$st->{"rtime"}) / 20;
          push(@xstat, [$st->{"ftime"} +$delta, 'FREQ', sprintf("age policy 5%% (%s %s)", tdist($delta))]);
        }
    }
  elsif ( $st->{"ftime"} ) {
  # 3. reasoning on file age and update frequency
      if ( my $xstime = $st->{"mtime"} || $st->{"stime"} ) {  # prefer "last modified" over #TIMESTAMP
      # 3a. wait at least 5% of age as concluded from last fetch
          my $delta = ($st->{"ftime"} -$xstime) / 20;
          push(@xstat, [$st->{"ftime"} +$delta, 'FREQ', sprintf("age policy 5%% (%s %s)", tdist($delta))]);
        };
      
      # 3b. wait at least $NOREVISITWAIT before reload
      my $btime = $st->{"ftime"};
      # try to get close to mtime
      $btime = $st->{"mtime"} if $st->{"mtime"} && ($st->{"ftime"} -$st->{"mtime"} < $NOREVISITWAIT);
      push(@xstat, [$btime +$NOREVISITWAIT, 'WAIT', sprintf("revisit policy %.1fh", $NOREVISITWAIT/3600)]);
   };

  # 4. wait at least $RETRYWAIT before next attempt
  push(@xstat, [$st->{"utime"} +$RETRYWAIT, 'WAIT', sprintf("retry policy %.1fh", $RETRYWAIT/3600)]) if $st->{"utime"};

  my ($forced, $xtime, $maxtime) = (0, 0, 0);
  my @conds;
  foreach ( sort {$a->[0] <=> $b->[0]} @xstat ) {
      my($stamp, $code, $message) = @$_;
      $forced = 1 if $message =~ /^!/;
      if ( $stamp < $^T ) {    # expired conditions
          $forced ||= -1 if $message =~ /^\+/;
          push(@conds, [$stamp, $code, $message]);
          $xtime = $stamp;
        }
      else {
          push(@conds, [$stamp, $code, $message]);
          if ( $message =~ /^\+/ ) {
              $maxtime ||= $stamp}
          elsif ( $maxtime ) {
              $xtime = $maxtime}
          else {
              $forced = 0 if $forced < 0;
              $xtime = $stamp;
            }
        }
    };
  return ($forced, $xtime, \@conds);
}

sub lct {
  return "---" unless $_[0];
  if ( $_[0] =~ /^\d+(\.\d+)?$/ ) {
      return strftime("%a, %F %T %z", localtime($_[0]))}
  else {
      return strftime("%a, %F %T %z", localtime(HTTP::Date::str2time($_[0], "GMT")))};
}

sub tdist {
  local($_) = @_;
  my $vz = "+";
  if ( $_ < 0 ) {
      $_ = -$_;
      $vz = "-";
    };
  my @elems = ();

  unshift(@elems, ($_ % 60)."s"); 
  $_ = int($_ / 60) or return vzfix($vz, @elems);

  unshift(@elems, ($_ % 60)."m"); 
  $_ = int($_ / 60) or return vzfix($vz, @elems);

  unshift(@elems, ($_ % 24)."h");
  $_ = int($_ / 24) or return vzfix($vz, @elems);

  my ($y, $m, $w);
  if ( $_ > 365 * 1.5) {
      $y = int($_ / 365); $_ %= 365};
  if ( $_ > 30 * 1.5 ) {
      $m = int($_ / 30); $_ %= 30};
  if ( $_ > 7 * 1.5 ) {
      $w = int($_ / 7); $_ %= 7};
  unshift (@elems, $_."d") if $_;
  unshift (@elems, $w."w") if $w;
  unshift (@elems, $m."M") if $m;
  unshift (@elems, $y."Y") if $y;
  return vzfix($vz, @elems);
}

sub vzfix {
  my $vz = shift @_;
  substr($_[0], 0, 0) = $vz;
  return (@_, "", "", "");
}


=head1 AUTHOR

    Thomas Berger
    CPAN ID: THB
    gymel.com
    THB@cpan.org

=head1 COPYRIGHT

This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the
LICENSE file included with this module.


=head1 SEE ALSO

SeeAlso::Source::BeaconAggregator::Maintenance
SeeAlso::Source::BeaconAggregator::Publisher

=cut

1;

### THE END ###