The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#! /usr/bin/perl -w
## babble - a simple Babble frontend script
## Copyright (C) 2004 Gergely Nagy <algernon@bonehunter.rulez.org>
##
## This file is part of Babble.
##
## Babble is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; version 2 dated June, 1991.
##
## Babble is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
## FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
## for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

use strict;
use Config::IniFiles;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
use Pod::Usage;
use Date::Manip;
use Text::Iconv;

use Babble;
use Babble::Utils;
use Babble::Processors::Extra;
use Babble::Encode;

my $cfgfile = "-";
my $verbose = 0;
my $man = 0;
my $help = 0;
my $version = 0;
GetOptions (
	'verbose|v+' => \$verbose,
	'debug|d' => sub { $verbose = 3; },
	'help|?' => \$help,
	'version|V' => \$version,
	'man' => \$man
) or pod2usage (2);
pod2usage (-exitstatus => 0, -verbose => 1) if $help;
pod2usage (-exitstatus => 0, -verbose => 2) if $man;
if ($version) {
	print "babble (Babble " . $Babble::VERSION . ")\n";
	exit 0;
}
if ($verbose >= 3) {
	use Carp;
	$SIG{__WARN__} = \&carp;
	$SIG{__DIE__} = \&confess;
}

$| = 1;

$cfgfile = $ARGV[0] if $ARGV[0];
print "Reading configuration from " . $cfgfile . " ...\n"
	if ($verbose >= 2);

my $cfg;
if ($cfgfile eq '-') {
	$cfg = Config::IniFiles->new (-file => \*STDIN);
} else {
	$cfg = Config::IniFiles->new (-file => $cfgfile);
}
my @feeds = ();
my @themes = ();
my @cbs = ();
my %babble_config = ();

foreach my $param ($cfg->Parameters ('Babble')) {
	my $key = $param;
	my $val = $cfg->val ('Babble', $param);
	if ($param =~ /^\&(.*)/) {
		$key = $1;
		$val = eval "$val";
	}
	$babble_config{$key} = $val;
}

push (@cbs, \&cb_collect_start) if ($verbose >= 2);

my $babble = Babble->new (
	-cache => {
		-cache_fn => $babble_config{cache_fn} ||
			$babble_config{cache_db},
		-cache_format => $babble_config{cache_format},
	},
	-callbacks_collect_start => \@cbs,
	-processors => [ \&Babble::Processors::Extra::creator_map,
		 \&Babble::Processors::Extra::parent_map ],
);

print "Recording sources ...\n" if ($verbose >= 2);

foreach my $feed (grep (!/^(theme:|babble$)/i, $cfg->Sections)) {
	my %feed_params = (
		-location => $feed,
		-cache_parsed => $babble_config{cache_parsed},
		-cache_only => $babble_config{cache_only},
		-preprocessors => [ \&utficate, \&htmlfix ],
		-id => $cfg->val ($feed, 'name') || $feed,
	);

	my $type = $cfg->val ($feed, 'type') || "RSS";

	print ' Recording source `' . $feed . '\' (type ' . $type . ") ...\n"
		if ($verbose >= 3);

	eval "use Babble::DataSource::$type;";
	if ($@) {
		$type = uc ($type);
		eval "use Babble::DataSource::$type;";
		die $@ if $@;
	}

	foreach my $param (grep (!/^(name|type)/, $cfg->Parameters ($feed))) {
		my $key = '-' . $param;
		my $val = $cfg->val ($feed, $param);
		if ($param =~ /^\&(.*)/) {
			$key = '-' . $1;
			$val = eval "$val";
		}
		$feed_params{$key} = $val;
	}

	push (@feeds, "Babble::DataSource::$type"->new (%feed_params));
}

$babble->add_params (%babble_config);
$babble->add_sources (@feeds);

print "Collecting sources ...\n" if ($verbose >= 2);

$babble->collect_feeds ();

print "Caching ...\n" if ($verbose >= 2);

$babble->cache (
	-cache_fields => [ 'id', 'date' ]
);

print "Limiting ...\n" if ($verbose >= 2);
my (@collection, $limit);

$limit = $babble_config{days_per_page};
if ($limit) {
	@collection = sort { $b->date_iso cmp $a->date_iso }
		$babble->search ([{
			field => 'date',
			pattern => ParseDate (
				DateCalc ('today', '-' . $limit .
						  'days')),
			cmp => sub {
				my ($a, $b) = @_;
				return (Date_Cmp ($a, $b) > 0);
			},
		}]);
}
@collection = $babble->sort () unless @collection;
$limit = $babble_config{items_per_page} || $babble_config{limit};
delete @collection[$limit..$#collection] if $limit;

$babble->add_params (babble_collection => \@collection);

foreach my $theme (split (/ /, $babble_config{themes})) {
	my %theme_config = ();
	foreach my $param ($cfg->Parameters ("theme:" . $theme)) {
		my $key = $param;
		my $val = $cfg->val ('theme:' . $theme, $param);
		if ($param =~ /^\&(.*)/) {
			$key = $param;
			$val = eval "$val";
		}
		$theme_config{$key} = $val;
	}

	if ($theme_config{theme}) {
		$theme_config{-theme} = $theme_config{theme};
	} else {
		$theme_config{-theme} = $theme;
	}

	foreach my $format (split (/ /, $theme_config{formats})) {
		my $file;

		if ($theme_config{output}) {
			$file = $theme_config{output};
		} else {
			$file = "index." . $format;
		}

		print "Generating " . $babble_config{output_dir} .
			"/" . $file . " ...\n" if $verbose;

		$theme_config{-format} = $format;
		open (OUTF, ">" . $babble_config{output_dir} .
			      "/" . $file);
		binmode OUTF, ":raw";
		print OUTF $babble->output (%theme_config);
		close (OUTF);
	}
}

sub utficate {
	my $text = shift;
	my $source_encoding = 'utf-8';
	my ($c, $tmp);

	if ($$text =~ /<\?.*encoding=[\'"](.*?)[\'"].*\?>/) {
		# The above regexp comes from Mark Pilgrim's
		# UltraLiberalFeedParser.
		$source_encoding = $1;
		$$text =~ s/(<\?.*encoding=[\'"])
			    (.*?)
			    ([\'"].*\?>)/${1}utf-8${3}/x;
	}
	$c = Text::Iconv->new ($source_encoding, 'utf-8');
	$tmp = $c->convert ($$text);

	return ($$text = $tmp) if $tmp;

	# Right, so the conversion did not go well. Let's go hardcore.
	foreach my $encoding ('iso-8859-2', 'iso-8859-1', 'windows-1250') {
		$c = Text::Iconv->new ($encoding, 'utf-8');
		$tmp = $c->convert ($$text);
		return ($$text = $tmp) if $tmp;
	}

	# We are still here? OH GOD! This must be some broken feed, so
	# lets carp a little.
	carp "Feed with broken encoding encountered.";
	# Then strip all the unsafe chars
	$$text =~ s/[^[:ascii:]]/./;
	$$text = $c->convert ($$text);
}

sub htmlfix {
	my $text = shift;

	return unless $$text;

	$$text =~ s/&nbsp;/\&#160;/g;
	$$text =~ s/&euro;/\&#8364;/g;
	$$text =~ s/\&((?!(\w+|#\d+);))/\&amp;$1/g;
	$$text =~ s/\<([^\s\>\@]*\@[^\>\s]*)\>/\&lt;$1\&gt;/g;
}

sub cb_collect_start {
	my $source = shift;

	print ' Collecting: `' . $$source->{-id} . '\' from ' .
		$$source->{-location} . "\n";
}

=pod

=head1 NAME

babble - Simplistic Babble frontend

=head1 SYNOPSIS

babble [options] [configfile]

=head1 DESCRIPTION

The babble script is a simple wrapper around the core functionality of
Babble. It can read a config file, which defines an aggregation and a
set of output templates. Using this information, the script will
generate static output.

=head1 OPTIONS

=over 4

=item --debug

Enable debugging output, slightly more verbose than B<--verbose>.

=item --help

Display a help message and exit.

=item --verbose

Enable verbose mode, printing the current state of processing.

=item --version

Print version information and exit.

=back

=head1 CONFIG FILE SYNTAX

The configuration file syntax is pretty easy and
straightforward. There are a few special sections, and so-called feed
sections. The first and most important section is I<Babble>, which
describes the current babble's basic properties. The script recognises
the I<output_dir> and I<themes> variables under this section. All
other variables will be used as parameters in the created Babble
object.

Then, there are the so-called theme descriptor sections. They all
begin with a I<theme:> prefix, and the rest is one of the items in the
I<Babble> section's I<themes> variable. That is, if the configration
file says something like this:

 [Babble]
 themes = html rss20

Then the two recognised theme descriptor sections are I<theme:html>
and I<theme:rss20>. Each of these sections can contain a I<theme>
variable (defaults to the name of the theme, ie I<html> and I<rss20>
in our example), which specifies the theme to use; I<formats>, which
selects the formats to use (some themes provide multiple output
formats); and I<output>, the name of the output file.

=head1 EXAMPLE

 [Babble]
 themes = html rss20
 output_dir = output
 cache_db = feed_cache.db

 [theme:html]
 theme = planet_gray
 output = index.html

 [theme:rss20]
 theme = XML
 output = rss20.xml

=head1 AUTHOR

Gergely Nagy, algernon@bonehunter.rulez.org

Bugs should be reported at L<http://bugs.bonehunter.rulez.org/babble>.

=head1 SEE ALSO

Babble, babble-cache

=cut

# arch-tag: 6ab4db9f-fab6-4947-a24d-34cab756e1d3