The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package PDF::Tk;

use IO::All;
use IPC::Open2;
use File::Temp qw/tempdir tempfile/;
use Cwd;
use strict;
our $VERSION='0.02';

sub new {
    my ($proto,%options)=@_;
    my $class= ref $proto || $proto;
    my $self=\%options;
    bless $self,$class;
    die "Can't take both a file and document argument"
     if ($self->{file} && $self->{document});
    $self->{pdftk} ||= "/usr/bin/pdftk";
    $self->_get_document if $self->{file};
    die "Can't find executablable ".$self->{pdftk}
        unless -x $self->{pdftk};
    return $self;
}

sub _get_document {
   my $self=shift;
   my @targets;
   if (ref($self->{file}) eq "ARRAY") {
   	warn "Setting together an arrayref";
   	foreach my $file (@{$self->{file}}) {
	  if (ref($file) eq "SCALAR") {
	  	local $/;
		my ($fh,$filename)=tempfile();
		print $fh $$file; # put data in a file
		$file=$filename; # set the filename
		push @targets,$filename;
	  }
	}
      $self->call_pdftk($self->{file},\($self->{document}),"cat"); 
       die "Could not load ".$self->{file} unless $self->{document}; 
   } else {
       $self->{document} = io($self->{file})->binary->all;
       die "Could not load ".$self->{file} unless $self->{document}; 
   }
   delete $self->{file};
   unlink @targets;
}

sub call_pdftk {
    my ($self,$input,$output,@args)=@_;
    local $/;
    if (ref $input eq "SCALAR" &&  ref $output eq "SCALAR") {
      my ($rdfh,$wrfh);
      my $pid=open2($rdfh,$wrfh,$self->{pdftk},"-",@args,"output","-") 
         or die "pdftk - @args - failed: $?";
      print $wrfh $$input;
      close $wrfh;
      $$output=<$rdfh>;
      close  $rdfh;
      waitpid $pid,0;
    } elsif (ref $input eq "SCALAR") {
      my $fh;
      open($fh,"|-",$self->{pdftk},"-",@args,"output",$output)
         or die $self->{pdftk}." - @args output $output failed: $?";
      print $fh $$input;
      close $fh;
    } elsif (ref $output eq "SCALAR") {
      my $fh;
      open($fh,"-|",$self->{pdftk},(ref $input eq "ARRAY" ? @$input : $input),@args,"output","-")
       or die "pdftk $input @args - failed: $?";
      $$output=<$fh>;
      close $fh;
    } else {
      system($self->{pdftk},(ref $input eq "ARRAY" ? @$input : $input),@args,"output",$output) == 0 
       or die "pdftk $input @args $output failed: $?";
    }
}

sub document {
    my ($self,$doc)=@_;
    if ($doc) { $self->{document}=$doc; }
    else      { return $self->{document}; }
}

sub pages {
    my $self=shift;
    my $tmpdir=tempdir;
    my ($pdftk,@pages);
    chdir $tmpdir;
    $self->call_pdftk(\($self->{document}),'%d.pdf','burst');
    my $page=1;
    while (-f "./$page.pdf") {
        push @pages,io(cwd."/$page.pdf")->binary->all;
	unlink (cwd."/$page.pdf");
	$page++;
    }
    unlink ("doc_data.txt");
    chdir "/";
    rmdir $tmpdir;
    return (wantarray ? @pages :\@pages);
}

sub page {
    my ($self,$page)=@_;
    my $tmpdir=tempdir;
    chdir $tmpdir;
    $self->call_pdftk(\($self->{document}),'%d.pdf','burst');
    my $res=io(cwd."/$page.pdf")->binary->all;
    unlink <*.pdf>;
    unlink ("doc_data.txt");
    chdir "/";
    rmdir $tmpdir;
    return $res;

}

sub docinfo {
    my ($self,$arg)=@_;
    unless ($self->{documentinfo}) {
        my $documentinfo;
        $self->call_pdftk(\($self->{document}),\$documentinfo,"dump_data");
        my @lines=split "\n",$documentinfo;
        my %documentinfo;
        while (my $line=shift @lines) {
            my ($key,$val)=split m/\:\s*/,$line;
            if ($key eq "InfoKey") {
                $key=$val;
                $line=shift @lines;
                ($val)=$line=~m/InfoValue\:\s*(.+)/;
            }
            $documentinfo{lc($key)}=$val;
        }
        $self->{documentinfo}=\%documentinfo;
   }
   return $self->{documentinfo}->{$arg}if ($arg);
   return $self->{documentinfo};
}
            
 
1;

=head1 NAME

PDF::Tk - Perl integration for the pdf toolkit (pdftk)

=head1 SYNOPSIS

  use PDF::Tk;
  my $doc=PDF::Tk->new(file=>["/tmp/my1.pdf","/tmp/my2.pdf"]);
  my @parts=$doc->pages();

=head1 DESCRIPTION

This module is a interface for the command line pdftk command. 

=head1 METHODS

=over 4

=item new

The constructor for the pdftk module. Takes a hash of arguments

    document - a scalar containing a PDF document,
    file - either a PDF filename or a arrayref of filenames.
    pdftf - path to the pdftk binary, defaults to "/usr/bin/pdftk"

note that document and file are mutually exclusive!

=item call_pdftk

Calls up pdftk command, takes input, output and pdftk operation as arguments
input and output can either be files or scalar refs. input can also be an
array ref of files

=item pages

returns an array in list context, or arrayref, containing the content of all
pages in the document.

=item page

Takes a page as an argument, and returns the contents of that page.

=item docinfo

If you provide an argument, it will return that value (lower cased), or 
else it will return a hash of values;
Common values are B<creator> ,B<title>, B<producer>,B<author>, B<moddate>,
B<creationdate>, B<pdfid0>, B<pdfid1>, B<numberofpages>.

=item document

Accessor for the actual document.

=back

=head1 SEE ALSO

L<http://www.accesspdf.com/pdftk/>

=head1 AUTHOR

Marcus Ramberg, E<lt>marcus@mediaflex.noE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2004 by Mediaflex A/S.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. 

=cut