The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Plucene::SearchEngine::Index::PDF;
use base 'Plucene::SearchEngine::Index::Base';
__PACKAGE__->register_handler("application/pdf", ".pdf");
use File::Temp qw/tmpnam/;

=head1 NAME

Plucene::SearchEngine::Index::PDF - Backend for parsing PDF

=head1 DESCRIPTION

This backend analyzes a PDF file for its textual content (using C<pdftotext>)
and turns any metadata found in the PDF into Plucene fields.

=cut

sub gather_data_from_file {
    my ($self, $filename) = @_;
    my $html = tmpnam();
    system("pdftotext", "-htmlmeta", $filename, $html);
    return unless -e $html;
    $self->Plucene::SearchEngine::Index::HTML::gather_data_from_file($html);
    unlink $html;
    return $self;
}

1;