#! /usr/bin/perl -w
## babble - a simple Babble frontend script
## Copyright (C) 2004 Gergely Nagy <algernon@bonehunter.rulez.org>
##
## This file is part of Babble.
##
## Babble is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; version 2 dated June, 1991.
##
## Babble is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
## for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
use strict;
use Config::IniFiles;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
use Pod::Usage;
use Date::Manip;
use Text::Iconv;
use Babble;
use Babble::Utils;
use Babble::Processors::Extra;
use Babble::Encode;
my $cfgfile = "-";
my $verbose = 0;
my $man = 0;
my $help = 0;
my $version = 0;
GetOptions (
'verbose|v+' => \$verbose,
'debug|d' => sub { $verbose = 3; },
'help|?' => \$help,
'version|V' => \$version,
'man' => \$man
) or pod2usage (2);
pod2usage (-exitstatus => 0, -verbose => 1) if $help;
pod2usage (-exitstatus => 0, -verbose => 2) if $man;
if ($version) {
print "babble (Babble " . $Babble::VERSION . ")\n";
exit 0;
}
if ($verbose >= 3) {
use Carp;
$SIG{__WARN__} = \&carp;
$SIG{__DIE__} = \&confess;
}
$| = 1;
$cfgfile = $ARGV[0] if $ARGV[0];
print "Reading configuration from " . $cfgfile . " ...\n"
if ($verbose >= 2);
my $cfg;
if ($cfgfile eq '-') {
$cfg = Config::IniFiles->new (-file => \*STDIN);
} else {
$cfg = Config::IniFiles->new (-file => $cfgfile);
}
my @feeds = ();
my @themes = ();
my @cbs = ();
my %babble_config = ();
foreach my $param ($cfg->Parameters ('Babble')) {
my $key = $param;
my $val = $cfg->val ('Babble', $param);
if ($param =~ /^\&(.*)/) {
$key = $1;
$val = eval "$val";
}
$babble_config{$key} = $val;
}
push (@cbs, \&cb_collect_start) if ($verbose >= 2);
my $babble = Babble->new (
-cache => {
-cache_fn => $babble_config{cache_fn} ||
$babble_config{cache_db},
-cache_format => $babble_config{cache_format},
},
-callbacks_collect_start => \@cbs,
-processors => [ \&Babble::Processors::Extra::creator_map,
\&Babble::Processors::Extra::parent_map ],
);
print "Recording sources ...\n" if ($verbose >= 2);
foreach my $feed (grep (!/^(theme:|babble$)/i, $cfg->Sections)) {
my %feed_params = (
-location => $feed,
-cache_parsed => $babble_config{cache_parsed},
-cache_only => $babble_config{cache_only},
-preprocessors => [ \&utficate, \&htmlfix ],
-id => $cfg->val ($feed, 'name') || $feed,
);
my $type = $cfg->val ($feed, 'type') || "RSS";
print ' Recording source `' . $feed . '\' (type ' . $type . ") ...\n"
if ($verbose >= 3);
eval "use Babble::DataSource::$type;";
if ($@) {
$type = uc ($type);
eval "use Babble::DataSource::$type;";
die $@ if $@;
}
foreach my $param (grep (!/^(name|type)/, $cfg->Parameters ($feed))) {
my $key = '-' . $param;
my $val = $cfg->val ($feed, $param);
if ($param =~ /^\&(.*)/) {
$key = '-' . $1;
$val = eval "$val";
}
$feed_params{$key} = $val;
}
push (@feeds, "Babble::DataSource::$type"->new (%feed_params));
}
$babble->add_params (%babble_config);
$babble->add_sources (@feeds);
print "Collecting sources ...\n" if ($verbose >= 2);
$babble->collect_feeds ();
print "Caching ...\n" if ($verbose >= 2);
$babble->cache (
-cache_fields => [ 'id', 'date' ]
);
print "Limiting ...\n" if ($verbose >= 2);
my (@collection, $limit);
$limit = $babble_config{days_per_page};
if ($limit) {
@collection = sort { $b->date_iso cmp $a->date_iso }
$babble->search ([{
field => 'date',
pattern => ParseDate (
DateCalc ('today', '-' . $limit .
'days')),
cmp => sub {
my ($a, $b) = @_;
return (Date_Cmp ($a, $b) > 0);
},
}]);
}
@collection = $babble->sort () unless @collection;
$limit = $babble_config{items_per_page} || $babble_config{limit};
delete @collection[$limit..$#collection] if $limit;
$babble->add_params (babble_collection => \@collection);
foreach my $theme (split (/ /, $babble_config{themes})) {
my %theme_config = ();
foreach my $param ($cfg->Parameters ("theme:" . $theme)) {
my $key = $param;
my $val = $cfg->val ('theme:' . $theme, $param);
if ($param =~ /^\&(.*)/) {
$key = $param;
$val = eval "$val";
}
$theme_config{$key} = $val;
}
if ($theme_config{theme}) {
$theme_config{-theme} = $theme_config{theme};
} else {
$theme_config{-theme} = $theme;
}
foreach my $format (split (/ /, $theme_config{formats})) {
my $file;
if ($theme_config{output}) {
$file = $theme_config{output};
} else {
$file = "index." . $format;
}
print "Generating " . $babble_config{output_dir} .
"/" . $file . " ...\n" if $verbose;
$theme_config{-format} = $format;
open (OUTF, ">" . $babble_config{output_dir} .
"/" . $file);
binmode OUTF, ":raw";
print OUTF $babble->output (%theme_config);
close (OUTF);
}
}
sub utficate {
my $text = shift;
my $source_encoding = 'utf-8';
my ($c, $tmp);
if ($$text =~ /<\?.*encoding=[\'"](.*?)[\'"].*\?>/) {
# The above regexp comes from Mark Pilgrim's
# UltraLiberalFeedParser.
$source_encoding = $1;
$$text =~ s/(<\?.*encoding=[\'"])
(.*?)
([\'"].*\?>)/${1}utf-8${3}/x;
}
$c = Text::Iconv->new ($source_encoding, 'utf-8');
$tmp = $c->convert ($$text);
return ($$text = $tmp) if $tmp;
# Right, so the conversion did not go well. Let's go hardcore.
foreach my $encoding ('iso-8859-2', 'iso-8859-1', 'windows-1250') {
$c = Text::Iconv->new ($encoding, 'utf-8');
$tmp = $c->convert ($$text);
return ($$text = $tmp) if $tmp;
}
# We are still here? OH GOD! This must be some broken feed, so
# lets carp a little.
carp "Feed with broken encoding encountered.";
# Then strip all the unsafe chars
$$text =~ s/[^[:ascii:]]/./;
$$text = $c->convert ($$text);
}
sub htmlfix {
my $text = shift;
return unless $$text;
$$text =~ s/ /\ /g;
$$text =~ s/€/\€/g;
$$text =~ s/\&((?!(\w+|#\d+);))/\&$1/g;
$$text =~ s/\<([^\s\>\@]*\@[^\>\s]*)\>/\<$1\>/g;
}
sub cb_collect_start {
my $source = shift;
print ' Collecting: `' . $$source->{-id} . '\' from ' .
$$source->{-location} . "\n";
}
=pod
=head1 NAME
babble - Simplistic Babble frontend
=head1 SYNOPSIS
babble [options] [configfile]
=head1 DESCRIPTION
The babble script is a simple wrapper around the core functionality of
Babble. It can read a config file, which defines an aggregation and a
set of output templates. Using this information, the script will
generate static output.
=head1 OPTIONS
=over 4
=item --debug
Enable debugging output, slightly more verbose than B<--verbose>.
=item --help
Display a help message and exit.
=item --verbose
Enable verbose mode, printing the current state of processing.
=item --version
Print version information and exit.
=back
=head1 CONFIG FILE SYNTAX
The configuration file syntax is pretty easy and
straightforward. There are a few special sections, and so-called feed
sections. The first and most important section is I<Babble>, which
describes the current babble's basic properties. The script recognises
the I<output_dir> and I<themes> variables under this section. All
other variables will be used as parameters in the created Babble
object.
Then, there are the so-called theme descriptor sections. They all
begin with a I<theme:> prefix, and the rest is one of the items in the
I<Babble> section's I<themes> variable. That is, if the configration
file says something like this:
[Babble]
themes = html rss20
Then the two recognised theme descriptor sections are I<theme:html>
and I<theme:rss20>. Each of these sections can contain a I<theme>
variable (defaults to the name of the theme, ie I<html> and I<rss20>
in our example), which specifies the theme to use; I<formats>, which
selects the formats to use (some themes provide multiple output
formats); and I<output>, the name of the output file.
=head1 EXAMPLE
[Babble]
themes = html rss20
output_dir = output
cache_db = feed_cache.db
[theme:html]
theme = planet_gray
output = index.html
[theme:rss20]
theme = XML
output = rss20.xml
=head1 AUTHOR
Gergely Nagy, algernon@bonehunter.rulez.org
Bugs should be reported at L<http://bugs.bonehunter.rulez.org/babble>.
=head1 SEE ALSO
Babble, babble-cache
=cut
# arch-tag: 6ab4db9f-fab6-4947-a24d-34cab756e1d3