The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package PDF::OCR2;
use strict;
use PDF::OCR2::Page;
use PDF::OCR2::Base;
use LEOCHARRE::Class2;
use Carp;
use vars qw($VERSION $DEBUG @TRASH $CHECK_PDF $NO_TRASH_CLEANUP $REPAIR_XREF);
__PACKAGE__->make_accessor_setget( 'abs_path', );
#__PACKAGE__->make_accessor_setget_unique_array(')
__PACKAGE__->make_count_for( '_abs_bursts' );
$VERSION = sprintf "%d.%02d", q$Revision: 1.21 $ =~ /(\d+)/g;

sub debug { $DEBUG or return 1; print STDERR  __PACKAGE__.": @_\n"; 1 }
*page = \&_page;
*pages_count = \&_abs_bursts_count;

sub new {
   my($class,$arg) = @_;
   if( $arg and ref $arg ){ croak("argument to constructor must be path to pdf"); }
   $arg or croak('missing arg to constructor');
   
   my $self = {};
   bless $self, $class;

   # this checks the pdf with PDF::API2 if PDF::OCR2::CHECK_PDF is set
   ( $self->{abs_path} = PDF::OCR2::Base::get_abs_pdf($arg) ) or return;

   return $self;
}







sub _abs_bursts {
   my $self = shift;

   unless( $self->{_abs_bursts} ){
      my $abs = $self->abs_path or warn("Cant burst, no abs path") and return;
      print STDERR __PACKAGE__."::_abs_bursts() bursting '$abs'.. " if $DEBUG;

      require PDF::Burst;
      my @abs = PDF::Burst::pdf_burst($abs) or warn('error'); #carp($PDF::Burst::errstr);
      $self->{_abs_bursts} = [@abs]; # even if none returned, now contains aref
      push @TRASH, @abs; 

      print STDERR "Done. Got: @abs\n" if $DEBUG;
   }
   
   wantarray and return @{$self->{_abs_bursts}};
   return $self->{_abs_bursts};
}

sub _page { # return page object
   my($self,$pagenum) = @_;
   
   $pagenum=~/\D/ and croak("arg must be page number");
   
   unless( $self->{page}->{$pagenum} ){
      debug("instancing page object page $pagenum");
      my $abs = $self->_abs_bursts->[($pagenum - 1 )] 
         or croak("No such page num: $pagenum");
      debug($abs);
      my $o = 
         PDF::OCR2::Page->new({ abs_pdf => $abs }) 
         or die("Could not instance PDF::OCR2::Page for $abs");
      $self->{page}->{$pagenum} = $o;
   }
   $self->{page}->{$pagenum};
}



sub text {
   my $self = shift;

   my @texts;

   debug( " bursts count: ". $self->_abs_bursts_count);

   for my $pagenum ( 1 .. $self->_abs_bursts_count ){
      my $p = $self->_page($pagenum);
      push @texts, $p->text;
   }

   wantarray ? @texts : join( "\f", @texts);
}

sub text_length { length( scalar $_[0]->text ) }


sub DESTROY { unlink @TRASH unless ( $DEBUG or $NO_TRASH_CLEANUP ) }


1;