The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# $Id: /mirror/perl/File-Extract/trunk/lib/File/Extract/PDF.pm 4210 2007-10-27T13:43:07.499967Z daisuke  $
#
# Copyright (c) 2005 Daisuke Maki <dmaki@cpan.org>
# All rights reserved.

package File::Extract::PDF;
use strict;
use base qw(File::Extract::Base);
use CAM::PDF;
use File::Extract::Result;

sub mime_type { 'application/pdf' }
sub extract
{
    my $self = shift;
    my $file = shift;

    my $doc  = CAM::PDF->new($file);
    my $text = '';

    foreach my $p (1..$doc->numPages()) {
        $text .= $doc->getPageText($p);
    }

    return File::Extract::Result->new(
        text      => eval { $self->recode($text) } || $text,
        filename  => $file,
        mime_type => $self->mime_type
    );
}

1;

__END__

=head1 NAME

File::Extract::PDF - Extract Text From PDF

=cut