The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

use Text::PDF::File;
use Text::PDF::Utils;
use Getopt::Std;


$version = "1.6";       # MJPH  13-JAN-2003     -s2b option supported
#$version = "1.505";     # MJPH   3-AUG-2002     -s comma separated. Allow -ve values in -s
#                                                 merge errors store something, at least!                        
# $version = "1.504";     # MJPH  27-JUN-2002     Use CropBox over MediaBox
# $version = "1.503";     # MJPH  19-FEB-2002     Fix -p1 positioning (again!)
# $version = "1.502";     # MJPH  18-JUN-2001     Add support for -p4s
# $version = "1.501";     # MJPH   2-MAY-2001     Correct positioning of -s;;; type pages
# $version = "1.500";     # MJPH  26-JUL-2000     Correct positioning in some cases and add landscape sizes
# $version = "1.100";     # MJPH   3-AUG-1998     Support new PDF library
# $version = "1.101";     # MJPH  13-OCT-1998     Debug resource merging not being output
# $version = "1.200";     # MJPH   6-NOV-1998     Merging external resources and change -r
# $version = "1.201";     # MJPH  29-DEC-1998     Add test in merge_dict for $v2 not present
# $version = "1.300";     # MJPH  15-JUN-1999     Remove outlines, etc. when making booklet
# $version = "1.302";     # MJPH  30-NOV-1999     Update to use Text::PDF
# $version = "1.302";     # MJPH  11-DEC-1999     Make sure updated root output
# $version = "1.4";       # MJPH   1-FEB-2000     Add -l and preset paper sizes
# $version = "1.401";     # MJPH  28-JUN-2000     Debug content lists and -r

getopts("b:h:lp:qrs:");

if (!defined $ARGV[0])
{
    die <<"EOT";
    PDFBKLT [-b num/size] [-p num] [-q] [-r] [-s size] pdffile

 (c) M. Hosken.     Version: $version

    Converts a PDF file into a booklet. It edits the pdffile to add the
modifications at the end.

  -b num/size    Specifies which page contains the output page size details [1]
            or gives the dimensions of the page in pts (x;y) or A4,ltr,lgl,A5
  -l        Flag to indicate linear scaling
  -p num    Specifies the number of pages on the output page (1, 2, 4) [2]
            If 4, can be 4s to flip on short edge
  -q        Quiet (no on screen messages)
  -r        Rotates the output (-p 2 rotates automatically, -r rotates back)
  -s size   Specifies the the location of the actual text on a page:
                2   half page right and left (big gutter)
                2r  half page always on right
                2l  half page always on left
                2b  dual half pages on right and left 
                            assumes last page opposite first
                4   1/4 page right and left bottom (very big gutter)
                4t  1/4 page right and left top (very big gutter)
                4r/4l/4rt/4lt   1/4 page always on right or left bottom or top
                location as a single string: minx,miny,maxx,maxy in pts
EOT
}

%sizes=(
    'a4' => '595;842',
    'a4l' => '842;595',
    'ltr' => '612;792',
    'ltrl' => '792;612',
    'lgl' => '612;1008',
    'lgll' => '1008;612',
    );

$opt_b = $sizes{lc($opt_b)} if defined $sizes{lc($opt_b)};
print "Reading file\n" unless $opt_q;
$p = Text::PDF::File->open($ARGV[0], 1) || die "Can't open $ARGV[0]";
$r = $p->{'Root'}->realise;
$pgs = $r->{'Pages'}->realise;
$pgcount = $pgs->{'Count'}->val;

foreach (qw(Outlines Dests Threads AcroForm PageLabels StructTreeRoot OpenAction PageMode))
{
    if (defined $r->{$_})
    {
        delete $r->{$_};
        $p->out_obj($r);
    }
}

$pcount = 0;
proc_pages($pgs);

if ($opt_s =~ m/b$/o)
{
    for ($i = 0; $i < $pgcount; $i++)
    {
        my ($pnum) = ($i == 0) ? 0 : 2 * $i - 1;
        my ($ref) = $pglist[$pnum]->copy($p, undef, 1, $p, 'clip' => ["^/Contents", '^/Resources/[^/]+/.*']);
        my ($npnum) = ($i == 0) ? -1 : $pnum + 1;
        $r->{'Pages'}->add_page($ref, $npnum);
        if ($npnum == -1)
        { 
            push (@pglist, $ref); 
            $pglist[$npnum]{' pnum'} = $pgcount * 2;
        }
        else
        { 
            splice(@pglist, $npnum, 0, $ref); 
            $pglist[$npnum]{' pnum'} = $npnum;
        }
        $ref->{' pnum'} = $pnum;
        print STDERR '.' unless $opt_q;
    }
    $pgcount *= 2;
#    $p->append_file;
#    exit(1);
}


$opt_p = 2 unless defined $opt_p;
$opt_r = !$opt_r if (($opt_p == 2 && $opt_s !~ /^2[rltb]*$/oi) || ($opt_p != 2 && $opt_s =~ /^2[rlt]*$/oi));

if ($opt_b =~ m/^([0-9]+)\;([0-9]+)/oi)
{ @pbox = (0, 0, $1, $2); }
else
{
    $opt_b = 1 unless defined $opt_b;
    $opt_b--;

    foreach $n ($pglist[$opt_b]->find_prop('MediaBox')->elementsof)
    { push(@pbox, $n->val); }
}

$fpgins = PDFDict(); $p->new_obj($fpgins);
$spgins = PDFDict(); $p->new_obj($spgins);
$fpgins->{' stream'} = "q";
$spgins->{' stream'} = "Q";

unless ($opt_q)
{
    print "\nThere are $pgcount pages\n";
    print "Page box (pt) = $pbox[0], $pbox[1], $pbox[2], $pbox[3]\n";
}
$rpc = ($opt_p == 1) ? $pgcount : int(($pgcount + 3) / 4) * 4;

for ($i = 0; $i < $rpc / $opt_p; $i++)
{
    if ($opt_p == 1)
    {
        next if $i >= $pgcount;
        @pl = ($pglist[$i]);
        $m = $i + 1;
    }
    elsif ($opt_p == 2)
    {
        @pl = ($pglist[$i], $rpc - $i > $pgcount ? undef : $pglist[$rpc - $i - 1]);
        @pl = ($pl[1], $pl[0]) if ($i & 1);
        $m = ($i + 1) . ", " . ($rpc - $i);
    } else      # $opt_p == 4
    {
        @pl = ($pglist[2 * $i]);
# do these in a special order with increasing difficulty of passing if() requirement
        push (@pl, $pglist[2 * $i + 1]) if (2 * $i + 1 < $pgcount);
        push (@pl, $pglist[$rpc - 2 * $i - 2]) if ($rpc - 2 * $i - 2 < $pgcount);
        push (@pl, $pglist[$rpc - 2 * $i - 1]) if ($rpc - 2 * $i <= $pgcount);
        $m = (2 * $i + 1) . ", " . (2 * $i + 2) . ", " . ($rpc - 2 * $i - 1) . ", " . ($rpc - 2 * $i);
    }

    print "Merging " . $m . "\n" unless $opt_q;
    merge_pages(@pl);
}

$p->append_file;

sub proc_pages
{
    my ($pgs) = @_;
    my ($pgref);

    foreach $pgref ($pgs->{'Kids'}->elementsof)
    {
        print STDERR "." unless $opt_q;
        $pgref->realise;
        if ($pgref->{'Type'}->val =~ m/^Pages$/oi)
        { proc_pages($pgref); }
        else
        {
            push (@pglist, $pgref);
            $pgref->{' pnum'} = $pcount++;
        }
    }
}

sub merge_pages
{
    my (@pr) = @_;
    my ($n, $is, $rl, $bt, $j, $xs, $ys, $scale, $scalestr, $scalestrr, $id, $i);
    my (@slist, @s, $s, $bp, $s1, $s2, $p2, $min, $k);

    $s = undef;
    for ($j = 0; $j <= $#pr; $j++)
    {
        next unless defined $pr[$j];
        @prbox = (); @clipbox = ();
        foreach $n (($pr[$j]->find_prop('CropBox') || $pr[$j]->find_prop('MediaBox'))->elementsof)
        { push(@prbox, $n->val); }
        $is = 1;
        if ($opt_s =~ m/^(-?[0-9]+),(-?[0-9]+),(-?[0-9]+),(-?[0-9]+)/o)
        { @prbox = ($1, $2, $3, $4); }
        elsif ($opt_s =~ m/^([0-9])(.?)(.?)$/o)
        {
            $is = $1;
            $rl = lc($2);
            $bt = lc($3);
#            $rl = ($pr[$j]->{' pnum'} & 1) ? "l" : "r" unless ($rl =~ m/[rl]/o);
            $rl = ($j & 1) ? "l" : "r" unless ($rl =~ m/[rl]/o);
            if ($rl eq "r")
            { $prbox[1] = $prbox[3] - (($prbox[3] - $prbox[1]) / $is); }
            elsif ($rl eq "l")
            { $prbox[3] = $prbox[1] + (($prbox[3] - $prbox[1]) / $is); }

            if ($bt eq "t")
            { $prbox[2] = $prbox[0] + (($prbox[2] - $prbox[0]) * 2 / $is); }
            elsif ($is == 4)
            { $prbox[0] = $prbox[2] - (($prbox[2] - $prbox[0]) * 2 / $is); }
            elsif ($opt_s =~ m/b$/o)
            { @clipbox = ($prbox[0], $prbox[1], $prbox[2] - $prbox[0], $prbox[3] - $prbox[1]); }
        }
        elsif ($opt_s)
        { die "Illegal -s value of $opt_s"; }
        $id = join(',', @prbox) . ",$opt_p";
        if (!defined $scache{$id})
        {
            @slist = ();
            $xs = ($pbox[2] - $pbox[0]) / ($prbox[2] - $prbox[0]);
            $ys = ($pbox[3] - $pbox[1]) / ($prbox[3] - $prbox[1]);
            $scale = ($prbox[3] - $prbox[1]) / ($prbox[2] - $prbox[0]);
            $rot = (($opt_p == 1 || $opt_p == 4) && $is == 2)
                    || ($opt_p == 2 && $is != 2);

            if ($opt_l)
            {
                if ($xs < ($opt_p == 2 ? .5 : 1) * ($opt_r ? $scale * $scale : 1) * $ys)
                { $ys = $xs / ($opt_r ? $scale * $scale : 1) / ($opt_p == 2 ? .5 : 1); }
                else
                { $xs = $ys * ($opt_r ? $scale * $scale : 1) * ($opt_p == 2 ? .5 : 1); }
            }
            
            if ($opt_p == 1 && $is != 2)            # portrait to portrait
            {
                
                $slist[0] = cm($xs, 0, 0, $ys,
#                        $pbox[0] - ($xs * $prbox[0]), $pbox[1] - ($ys * $prbox[1]));
                        .5 * ($pbox[2] + $pbox[0] - $xs * ($prbox[2] + $prbox[0])),
                        .5 * ($pbox[3] + $pbox[1] - $ys * ($prbox[3] + $prbox[1])), @clipbox);
            } elsif ($opt_p == 1)                   # landscape on portrait to portrait
            {
                $slist[0] = cm(0, -$scale * $ys, $xs / $scale, 0,
                        $pbox[0] - $xs * $prbox[1] / $scale, $pbox[1] + $scale * $ys * $prbox[2], @clipbox);
            } elsif ($opt_p == 2 && $is != 2)       # portrait source on portrait
            {
                @scalestr = (0, 0.5 * $ys * $scale, -$xs / $scale, 0,
                        0.5 * ($xs * ($prbox[3] + $prbox[1]) / $scale + $pbox[2]), @clipbox);
                $slist[0] = cm(@scalestr,
#                        0.5 * (-$ys * $scale * $prbox[0] + $pbox[1] + $pbox[3]));
                         .25 * (3 * ($pbox[1] + $pbox[3]) - $ys * $scale * ($prbox[2] + $prbox[0])), @clipbox);
                $slist[1] = cm(@scalestr,
#                        -0.5 * $ys * $scale * $prbox[0] + $pbox[1]);
                         .25 * (3 * $pbox[1] + $pbox[3] - $ys * $scale * ($prbox[2] + $prbox[0])), @clipbox);
            } elsif ($opt_p == 2)                   # double page landscape on portrait
            {
                @scalestr = ($xs, 0, 0, 0.5 * $ys, .5 * ($pbox[2] - $xs * ($prbox[2] - $prbox[0])));
#                -$xs * $prbox[0] + $pbox[0]);
#                $slist[0] = cm(@scalestr, 0.5 * (-$ys * $prbox[1] + $pbox[1] + $pbox[3]), @clipbox);
#                $slist[1] = cm(@scalestr, -0.5 * $ys * $prbox[1] + $pbox[1], @clipbox);
                $slist[0] = cm(@scalestr, .25 * (3 * $pbox[3] + $pbox[1] - $ys * ($prbox[3] - $prbox[1])) - $prbox[1], @clipbox);
                $slist[1] = cm(@scalestr, .25 * (3 * $pbox[1] + $pbox[3] - $ys * ($prbox[3] - $prbox[1])) - $prbox[1], @clipbox);
            } elsif ($opt_p == 4 && $is == 1)       # true portrait
            {
                $a = .5 * $xs; $b = .5 * $ys;
                $slist[0] = cm($a, 0, 0, $b,
                        -$a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), -$b * $prbox[1] + $pbox[1], @clipbox);
                $slist[2] = cm(-$a, 0, 0, -$b,
                        $a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), $b * $prbox[1] + $pbox[3], @clipbox);
                if ($opt_p =~ /s/o)
                {
                    $slist[1] = cm($a, 0, 0, $b,
                            -$a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), -$b * $prbox[1] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox);
                    $slist[3] = cm(-$a, 0, 0, -$b,
                            $a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), $b * $prbox[1] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox);
                }
                else
                {
                    $slist[1] = cm(-$a, 0, 0, -$b,
                            $a * $prbox[0] + $pbox[2], $b * $prbox[1] + $pbox[3], @clipbox);
                    $slist[3] = cm($a, 0, 0, $b,
                            -$a * $prbox[0] + $pbox[0], -$b * $prbox[1] + $pbox[1], @clipbox);
                }
            } elsif ($opt_p == 4)
            {
                $a = .5 * $ys * $scale; $b = .5 * $xs / $scale;
                $slist[0] = cm(0, -$a, $b, 0,
                        -$b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), $a * $prbox[2] + $pbox[1], @clipbox);
                $slist[2] = cm(0, $a, -$b, 0,
                        $b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), -$a * $prbox[2] + $pbox[3], @clipbox);
                if ($opt_p =~ /s/o)
                {
                    $slist[1] = cm(0, -$a, $b, 0,
                            -$b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), $a * $prbox[2] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox);
                    $slist[3] = cm(0, $a, -$b, 0,
                            $b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), -$a * $prbox[2] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox);
                }
                else
                {
                    $slist[1] = cm(0, $a, -$b, 0,
                            $b * $prbox[1] + $pbox[2], -$a * $prbox[2] + $pbox[3], @clipbox);
                    $slist[3] = cm(0, -$a, $b, 0,
                            -$b * $prbox[1] + $pbox[0], $a * $prbox[2] + $pbox[1], @clipbox);
                }
            }
            $scache{$id} = [@slist];
        }
        @s = $pr[$j]->{'Contents'}->elementsof if (defined $pr[$j]->{'Contents'});
        
        if (!defined $s)
        {
            $min = 100000;
            for ($k = 0; $k <= $#pr; $k++)
            {
                next unless defined $pr[$k];
                if ($pr[$k]->{' pnum'} < $min)
                {
                    $bp = $pr[$k];
                    $min = $pr[$k]->{' pnum'};
                }
            }
            $s = PDFArray();
            $bp->{' Contents'} = $p->new_obj($s);
            $bp->{'Rotate'} = PDFNum(90) if ($opt_r);
        }
        next unless defined $pr[$j];
        $s->add_elements($fpgins) unless $j == $#pr;
        $s->add_elements($scache{$id}[$j]) unless ($opt_h == 17 && $j == $#pr);
        $s->add_elements(@s);
        $s->add_elements($spgins) unless $j == $#pr;

        if ($pr[$j] ne $bp)
        {
            $pr2 = $pr[$j];
            $s1 = $bp->find_prop('Resources');
            $s2 = $pr2->find_prop('Resources');
            $bp->{'Resources'} = merge_dict($s1, $s2) unless ($s1 eq $s2);

            $p->free_obj($pr2);
            while (defined $pr2->{'Parent'})
            {
                $temp = $pr2->{'Parent'};
                $temp->{'Kids'}->removeobj($pr2) 
                        if ($pr2->{'Type'}{'val'} eq 'Page' || $pr2->{'Kids'}->elementsof <= 0);
                $temp->{'Count'}{'val'}--;
                $pr2 = $temp;
                if ($pr2->{'Kids'}->elementsof <= 0)
                {
                    print "Killing a tree! $pr2->{'num'}\n" unless $opt_q;
                    $p->free_obj($pr2);
                } else
                { $p->out_obj($pr2); }
            }
        }
    }
    return 1 unless defined $bp;
    $bp->{'Contents'} = delete $bp->{' Contents'};
    foreach (qw(Annots Thumb Beeds CropBox))
    { delete $bp->{$_} if defined $bp->{$_}; }
    $bp->bbox(@pbox);
    $p->out_obj($bp);
}

sub merge_dict
{
    my ($p1, $p2) = @_;

    return $p1 if (ref $p1 eq "Text::PDF::Objind" && ref $p2 eq "Text::PDF::Objind"
            && $p->{' objects'}{$p1->uid}[0] eq $p->{' objects'}{$p2->uid}[0]);
#    $p1 = $p->read_obj($p1) if (ref $p1 eq "Text::PDF::Objind");
#    $p2 = $p->read_obj($p2) if (ref $p2 eq "Text::PDF::Objind");
    $p1->realise;
    $p2->realise;

    my ($k, $v1, $v2);
    my (@a1, @a2, %a1);

    if ($p1->{' isvisited'})
    {
        warn "circular reference!";
        return $p1;
    }
        
    $p1->{' isvisited'} = 1;
    $p2->{' isvisited'} = 1;

    foreach $k (keys %{$p1})
    {
        next if ($k =~ m/^\s/oi);
        $v1 = $p1->{$k};
        $v2 = $p2->{$k};
        next if $v1 eq $v2 || !defined $v2;     # !defined added v1.201
        if (ref $v1 eq "Text::PDF::Objind" || ref $v2 eq "Text::PDF::Objind")
        {
            $v1->realise if (ref $v1 eq "Text::PDF::Objind");
            $v2->realise if (ref $v2 eq "Text::PDF::Objind");
        }
        next unless defined $v2;
        
# assume $v1 & $v2 are of the same type
        if (ref $v1 eq "Text::PDF::Array")
        {
# merge contents of array uniquely (the array is a set)
            @a1 = $v1->elementsof;
            @a2 = $v2->elementsof;
            map { $a1{$_} = 1; } @a1;
            push (@a1, grep (!$a1{$_}, @a2));
            $p1->{$k} = PDFArray(@a1);
        } elsif (ref $v1 eq "Text::PDF::Dict")
#        { $p1->{$k} = merge_dict($v1->val, $v2->val); }
        { $p1->{$k} = merge_dict($v1, $v2); }
        elsif ($v1->val ne $v2->val)
        { 
            warn "Inconsistent dictionaries at $k with " . $v1->val . " and " . $v2->val;
            $p1->{$k} = $v1;
        }
    }

    foreach $k (grep (!defined $p1->{$_} && $_ !~ m/^\s/oi, keys %{$p2}))
    { $p1->{$k} = $p2->{$k}; }
    $p->out_obj($p1) if $p1->is_obj($p);
    delete $p1->{' isvisited'};
    delete $p2->{' isvisited'};
    return $p1;
}


sub cm
{
    my (@a) = @_;
    my ($res, $r, $str);

    foreach $r (@a)
    { $r = int($r) if (abs($r - int($r)) < 1e-6); }
    $str = "$a[6] $a[7] $a[8] $a[9] re W n" if (defined $a[6]);
    return undef if ($a[0] == 1 && $a[1] == 0 && $a[2] == 0 && $a[3] == 1 && $a[4] == 0 && $a[5] == 0 && $str eq '');

    $res = PDFDict();
    $p->new_obj($res);
    $res->{' stream'} = "$a[0] $a[1] $a[2] $a[3] $a[4] $a[5] cm $str";
    $res;
}

sub copy_page
{
    my ($page) = @_;
    return undef unless $page;
    my ($res) = $page->copy;
    $p->new_obj($res);
    
    $res;
}