lib/Plucene/SearchEngine/Index/PDF.pm

package Plucene::SearchEngine::Index::PDF;
use base 'Plucene::SearchEngine::Index::Base';
__PACKAGE__->register_handler("application/pdf", ".pdf");
use File::Temp qw/tmpnam/;

=head1 NAME

Plucene::SearchEngine::Index::PDF - Backend for parsing PDF

=head1 DESCRIPTION

This backend analyzes a PDF file for its textual content (using C<pdftotext>)
and turns any metadata found in the PDF into Plucene fields.

=cut

sub gather_data_from_file {
    my ($self, $filename) = @_;
    my $html = tmpnam();
    system("pdftotext", "-htmlmeta", $filename, $html);
    return unless -e $html;
    $self->Plucene::SearchEngine::Index::HTML::gather_data_from_file($html);
    unlink $html;
    return $self;
}

1;

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)