The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package WWW::OpenSearch;

use strict;
use vars qw($VERSION);
$VERSION = '0.05';

use Carp;
use Data::Page;
use LWP::UserAgent;
use URI::Escape;
use XML::RSS::LibXML;
use XML::LibXML;

my @Cols = qw(
Url Format ShortName LongName Description Tags Image SampleSearch
Developer Contact SyndicationRight AdultContent Query
);
for my $col (@Cols) {
    no strict 'refs';
    *$col = sub { shift->{$col} };
}

sub new {
    my($class, $url) = @_;
    $url or croak "Usage: WWW::OpenSearch->new(URL)";
    my $self = bless { DescriptionUrl => $url }, $class;
    $self->_init();
    $self->fetch_description($url);
    $self;
}

sub _init {
    my $self = shift;
    $self->{ua} = LWP::UserAgent->new(agent => "WWW::OpenSearch/$VERSION");
    $self->{pager} = Data::Page->new();
    $self->{pager}->current_page(1);
}

sub ua    { shift->{ua} }
sub pager { shift->{pager} }

sub fetch_description {
    my($self, $url) = @_;
    my $response = $self->ua->get($url);
    croak "Error while fetching $url: ". $response->status_line
	unless $response->is_success;
    eval {
	my $data = $self->parse_description($response->content);
	for my $attr (keys %$data) {
	    $self->{$attr} = $data->{$attr};
	}
    };
    if ($@) {
	croak "Error while parsing Description XML: $@";
    }
}

sub version {
    my $self = shift;
    $self->{version} = shift if @_;
    $self->{version};
}

sub parse_description {
    my $self = shift;
    my($xml) = @_;
    my $parser = XML::LibXML->new;
    my $doc = $parser->parse_string($xml);
    my $element  = $doc->documentElement;
    my $nodename = $element->nodeName;
    croak "Node should be OpenSearchDescription: $nodename"
        if $nodename ne "OpenSearchDescription";

    my $ns = $element->getNamespace->value;
    if ($ns eq "http://a9.com/-/spec/opensearch/1.1/") {
        $self->version("1.1");
    } else {
        $self->version("1.0");
    }

    my %data;
    for my $col (@Cols) {
        my $node = $doc->documentElement->getChildrenByTagName($col) or next;
        if ($self->version eq '1.1' && $col eq 'Url') {
            my $urlnode = ($node->get_nodelist)[0];
            my $type = $urlnode->getAttributeNode('type')->value;
            if ($type ne 'application/rss+xml') {
                croak "Url/\@type $type is not supported by this module. It should be application/rss+xml";
            }
            $data{$col} = $urlnode->getAttributeNode('template')->value;
            $data{$col} =~ s/\?}/}/g; # optional
        } elsif ($self->version eq '1.1' && $col eq 'Query') {
            my $thisnode = ($node->get_nodelist)[0];
            next if $thisnode->getAttributeNode('role')->value eq 'example';
            $data{SampleSearch} = $thisnode->getAttributeNode('searchTerms')->value;
        } else {
            $data{$col} = $node->string_value;
        }
    }

    \%data;
}

sub search {
    my($self, $query) = @_;
    my $url = $self->setup_query($query);

    my $response = $self->ua->get($url);
    croak "Error while fetching $url: ", $response->status_line
	unless $response->is_success;

    my $rss;
    eval {
	$rss = XML::RSS::LibXML->new();
	$rss->add_module(
	    prefix => "openSearch",
	    uri => "http://a9.com/-/spec/opensearchrss/1.0/",
	);
	$rss->parse($response->content);
	if (my $page = $rss->channel->{openSearch}) {
	    $self->pager->total_entries($page->{totalResults});
	    # XXX I don't understand how I parse startIndex attr.
	    $self->pager->entries_per_page($page->{itemsPerPage});
	}
    };
    if ($@) {
	croak "Error while parsing RSS feed: $@";
    }
    return $rss;
}

sub setup_query {
    my($self, $query) = @_;
    my $data;
    $data->{searchTerms} = uri_escape($query);
    $data->{count}       = $self->pager->entries_per_page;
    $data->{startIndex}  = $self->pager->first == 0 ? 0 : $self->pager->first - 1;
    $data->{startPage}   = $self->pager->{current_page}; # XXX hack

    my $url = $self->{Url}; # copy
    $url =~ s/{(searchTerms|count|startIndex|startPage)}/$data->{$1}/g;
    $url;
}

1;
__END__

=head1 NAME

WWW::OpenSearch - Search A9 OpenSearch compatible engines

=head1 SYNOPSIS

  use WWW::OpenSearch;

  my $url = "http://bulkfeeds.net/opensearch.xml";
  my $engine = WWW::OpenSearch->new($url);

  my $name = $engine->ShortName;
  my $tags = $engine->Tags;

  my $feed = $engine->search("iPod");
  for my $item (@{$feed->items}) {
      print $item->{description};
  }

  # page through page 2 with 20 items in each page
  # Note that some engines don't allow changing these values
  $engine->pager->entries_per_page(20);
  $engine->pager->current_page(2);
  my $feed = $engine->search("iPod");

=head1 BETA

This module is in beta version, which means its API interface and functionalities may be changed in future releases.

=head1 DESCRIPTION

WWW::OpenSearch is a module to search A9's OpenSearch compatible search engines. See http://opensearch.a9.com/ for details.

=head1 AUTHOR

Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt>

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=head1 SEE ALSO

L<XML::LibXML>, L<XML::RSS::LibXML>, L<Data::Page>, L<LWP>

=cut